svn commit: r198524 - in stable/6/sys: . conf contrib/pf dev/cxgb dev/mxge

Andrew Gallatin gallatin at FreeBSD.org
Tue Oct 27 18:30:56 UTC 2009


Author: gallatin
Date: Tue Oct 27 18:30:56 2009
New Revision: 198524
URL: http://svn.freebsd.org/changeset/base/198524

Log:
  MFC: recent mxge watchdog improvements:
  
  194909: add a dying flag (as a dependency)
  198250: Move mxge(4)'s NIC watchdog reset handler to a taskqueue
  198303: Check config space to react more quickly to failures

Modified:
  stable/6/sys/   (props changed)
  stable/6/sys/conf/   (props changed)
  stable/6/sys/contrib/pf/   (props changed)
  stable/6/sys/dev/cxgb/   (props changed)
  stable/6/sys/dev/mxge/if_mxge.c
  stable/6/sys/dev/mxge/if_mxge_var.h

Modified: stable/6/sys/dev/mxge/if_mxge.c
==============================================================================
--- stable/6/sys/dev/mxge/if_mxge.c	Tue Oct 27 18:30:26 2009	(r198523)
+++ stable/6/sys/dev/mxge/if_mxge.c	Tue Oct 27 18:30:56 2009	(r198524)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
+#include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -3368,7 +3369,6 @@ mxge_open(mxge_softc_t *sc)
 	}
 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 
 	return 0;
 
@@ -3385,15 +3385,13 @@ mxge_close(mxge_softc_t *sc, int down)
 	mxge_cmd_t cmd;
 	int err, old_down_cnt;
 
-	callout_stop(&sc->co_hdl);
 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if (!down) {
 		old_down_cnt = sc->down_cnt;
 		mb();
 		err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
 		if (err) {
-			device_printf(sc->dev,
-				      "Couldn't bring down link\n");
+			device_printf(sc->dev, "Couldn't bring down link\n");
 		}
 		if (old_down_cnt == sc->down_cnt) {
 			/* wait for down irq */
@@ -3458,7 +3456,7 @@ mxge_read_reboot(mxge_softc_t *sc)
 	return (pci_read_config(dev, vs + 0x14, 4));
 }
 
-static int
+static void
 mxge_watchdog_reset(mxge_softc_t *sc)
 {
 	struct pci_devinfo *dinfo;
@@ -3489,7 +3487,6 @@ mxge_watchdog_reset(mxge_softc_t *sc)
 		cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
 		if (cmd == 0xffff) {
 			device_printf(sc->dev, "NIC disappeared!\n");
-			return (err);
 		}
 	}
 	if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
@@ -3548,18 +3545,40 @@ mxge_watchdog_reset(mxge_softc_t *sc)
 		}
 		sc->watchdog_resets++;
 	} else {
-		device_printf(sc->dev, "NIC did not reboot, ring state:\n");
-		device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
-			      sc->ss->tx.req, sc->ss->tx.done);
-		device_printf(sc->dev, "pkt_done=%d fw=%d\n",
-			      sc->ss->tx.pkt_done,
-			      be32toh(sc->ss->fw_stats->send_done_count));
-		device_printf(sc->dev, "not resetting\n");
+		device_printf(sc->dev,
+			      "NIC did not reboot, not resetting\n");
+		err = 0;
 	}
-	if (err)
+	if (err) {
 		device_printf(sc->dev, "watchdog reset failed\n");
+	} else {
+		if (sc->dying == 2)
+			sc->dying = 0;
+		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
+	}
+}
 
-	return (err);
+static void
+mxge_watchdog_task(void *arg, int pending)
+{
+	mxge_softc_t *sc = arg;
+
+
+	mtx_lock(&sc->driver_mtx);
+	mxge_watchdog_reset(sc);
+	mtx_unlock(&sc->driver_mtx);
+}
+
+static void
+mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
+{
+	tx = &sc->ss[slice].tx;
+	device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
+	device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
+		      tx->req, tx->done);
+	device_printf(sc->dev, "pkt_done=%d fw=%d\n",
+		      tx->pkt_done,
+		      be32toh(sc->ss->fw_stats->send_done_count));
 }
 
 static int
@@ -3575,11 +3594,14 @@ mxge_watchdog(mxge_softc_t *sc)
 	    tx->watchdog_req != tx->watchdog_done &&
 	    tx->done == tx->watchdog_done) {
 		/* check for pause blocking before resetting */
-		if (tx->watchdog_rx_pause == rx_pause)
-			err = mxge_watchdog_reset(sc);
-		else
+		if (tx->watchdog_rx_pause == rx_pause) {
+			mxge_warn_stuck(sc, tx, 0);
+			taskqueue_enqueue(sc->tq, &sc->watchdog_task);
+			return (ENXIO);
+		} else {
 			device_printf(sc->dev, "Flow control blocking "
 				      "xmits, check link partner\n");
+		}
 	}
 
 	tx->watchdog_req = tx->req;
@@ -3591,7 +3613,7 @@ mxge_watchdog(mxge_softc_t *sc)
 	return (err);
 }
 
-static void
+static u_long
 mxge_update_stats(mxge_softc_t *sc)
 {
 	struct mxge_slice_state *ss;
@@ -3603,23 +3625,45 @@ mxge_update_stats(mxge_softc_t *sc)
 		ipackets += ss->ipackets;
 	}
 	sc->ifp->if_ipackets = ipackets;
-
+	return ipackets;
 }
+
 static void
 mxge_tick(void *arg)
 {
 	mxge_softc_t *sc = arg;
+	u_long pkts = 0;
 	int err = 0;
+	int running, ticks;
+	uint16_t cmd;
 
-	/* aggregate stats from different slices */
-	mxge_update_stats(sc);
-	if (!sc->watchdog_countdown) {
-		err = mxge_watchdog(sc);
-		sc->watchdog_countdown = 4;
+	ticks = mxge_ticks;
+	mtx_lock(&sc->driver_mtx);
+	running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+	mtx_unlock(&sc->driver_mtx);
+	if (running) {
+		/* aggregate stats from different slices */
+		pkts = mxge_update_stats(sc);
+		if (!sc->watchdog_countdown) {
+			err = mxge_watchdog(sc);
+			sc->watchdog_countdown = 4;
+		}
+		sc->watchdog_countdown--;
+	}
+	if (pkts == 0) {
+		/* ensure NIC did not suffer h/w fault while idle */
+		cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);		
+		if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
+			sc->dying = 2;
+			taskqueue_enqueue(sc->tq, &sc->watchdog_task);
+			err = ENXIO;
+		}
+		/* look less often if NIC is idle */
+		ticks *= 4;
 	}
-	sc->watchdog_countdown--;
+
 	if (err == 0)
-		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
+		callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
 
 }
 
@@ -3690,6 +3734,10 @@ mxge_ioctl(struct ifnet *ifp, u_long com
 
 	case SIOCSIFFLAGS:
 		mtx_lock(&sc->driver_mtx);
+		if (sc->dying) {
+			mtx_unlock(&sc->driver_mtx);
+			return EINVAL;
+		}
 		if (ifp->if_flags & IFF_UP) {
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				err = mxge_open(sc);
@@ -4212,6 +4260,17 @@ mxge_attach(device_t dev)
 	sc->dev = dev;
 	mxge_fetch_tunables(sc);
 
+	TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
+	sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK,
+				       taskqueue_thread_enqueue,
+				       &sc->tq);
+	if (sc->tq == NULL) {
+		err = ENOMEM;
+		goto abort_with_nothing;
+	}
+	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
+				device_get_nameunit(sc->dev));
+
 	err = bus_dma_tag_create(NULL,			/* parent */
 				 1,			/* alignment */
 				 0,			/* boundary */
@@ -4228,7 +4287,7 @@ mxge_attach(device_t dev)
 	if (err != 0) {
 		device_printf(sc->dev, "Err %d allocating parent dmat\n",
 			      err);
-		goto abort_with_nothing;
+		goto abort_with_tq;
 	}
 
 	ifp = sc->ifp = if_alloc(IFT_ETHER);
@@ -4355,12 +4414,14 @@ mxge_attach(device_t dev)
 		     mxge_media_status);
 	mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
 	mxge_media_probe(sc);
+	sc->dying = 0;
 	ether_ifattach(ifp, sc->mac_addr);
 	/* ether_ifattach sets mtu to 1500 */
 	if (ifp->if_capabilities & IFCAP_JUMBO_MTU)
 		ifp->if_mtu = 9000;
 
 	mxge_add_sysctls(sc);
+	callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 	return 0;
 
 abort_with_rings:
@@ -4382,7 +4443,12 @@ abort_with_lock:
 	if_free(ifp);
 abort_with_parent_dmat:
 	bus_dma_tag_destroy(sc->parent_dmat);
-
+abort_with_tq:
+	if (sc->tq != NULL) {
+		taskqueue_drain(sc->tq, &sc->watchdog_task);
+		taskqueue_free(sc->tq);
+		sc->tq = NULL;
+	}
 abort_with_nothing:
 	return err;
 }
@@ -4398,10 +4464,16 @@ mxge_detach(device_t dev)
 		return EBUSY;
 	}
 	mtx_lock(&sc->driver_mtx);
+	sc->dying = 1;
 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
 		mxge_close(sc, 0);
 	mtx_unlock(&sc->driver_mtx);
 	ether_ifdetach(sc->ifp);
+	if (sc->tq != NULL) {
+		taskqueue_drain(sc->tq, &sc->watchdog_task);
+		taskqueue_free(sc->tq);
+		sc->tq = NULL;
+	}
 	callout_drain(&sc->co_hdl);
 	ifmedia_removeall(&sc->media);
 	mxge_dummy_rdma(sc, 0);

Modified: stable/6/sys/dev/mxge/if_mxge_var.h
==============================================================================
--- stable/6/sys/dev/mxge/if_mxge_var.h	Tue Oct 27 18:30:26 2009	(r198523)
+++ stable/6/sys/dev/mxge/if_mxge_var.h	Tue Oct 27 18:30:56 2009	(r198524)
@@ -247,8 +247,11 @@ struct mxge_softc {
 	int need_media_probe;
 	int num_slices;
 	int rx_ring_size;
+	int dying;
 	mxge_dma_t dmabench_dma;
 	struct callout co_hdl;
+	struct taskqueue *tq;
+	struct task watchdog_task;
 	struct sysctl_oid *slice_sysctl_tree;
 	struct sysctl_ctx_list slice_sysctl_ctx;
 	char *mac_addr_string;


More information about the svn-src-stable-6 mailing list