svn commit: r194521 - in head/sys: conf dev/cxgb dev/cxgb/common dev/cxgb/sys modules/cxgb/cxgb

Kip Macy kmacy at FreeBSD.org
Fri Jun 19 23:34:33 UTC 2009


Author: kmacy
Date: Fri Jun 19 23:34:32 2009
New Revision: 194521
URL: http://svn.freebsd.org/changeset/base/194521

Log:
  Greatly simplify cxgb by removing almost all of the custom mbuf management logic
  
  - remove mbuf iovec - useful, but adds too much complexity when isolated to
     the driver
  
  - remove driver private caching - insufficient benefit over UMA to justify
    the added complexity and maintenance overhead
  
  - remove separate logic for managing multiple transmit queues, with the
    new drbr routines the control flow can be made to much more closely resemble
    legacy drivers
  
  - remove dedicated service threads, with per-cpu callouts one can get the same
    benefit much more simply by registering a callout 1 tick in the future if there
    are still buffered packets
  
  - remove embedded mbuf usage - Jeffr's changes will (I hope) soon be integrated
    greatly reducing the overhead of using kernel APIs for reference counting
    clusters
  
  - add hysteresis to descriptor coalescing logic
  
  - add coalesce threshold sysctls to allow users to decide at run-time
    between optimizing for forwarding / UDP or optimizing for TCP
  
  - add once per second watchdog to effectively close the very rare races
    occurring from coalescing
  
  - incorporate Navdeep's changes to the initialization path required to
    convert port and adapter locks back to ordinary mutexes (silencing BPF
    LOR complaints)
  
  - enable prefetches in get_packet and tx cleaning
  
  Reviewed by:	navdeep@
  MFC after:	2 weeks

Deleted:
  head/sys/dev/cxgb/cxgb_multiq.c
  head/sys/dev/cxgb/sys/cxgb_support.c
Modified:
  head/sys/conf/files
  head/sys/dev/cxgb/common/cxgb_t3_cpl.h
  head/sys/dev/cxgb/cxgb_adapter.h
  head/sys/dev/cxgb/cxgb_main.c
  head/sys/dev/cxgb/cxgb_osdep.h
  head/sys/dev/cxgb/cxgb_sge.c
  head/sys/dev/cxgb/sys/mvec.h
  head/sys/dev/cxgb/sys/uipc_mvec.c
  head/sys/modules/cxgb/cxgb/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Fri Jun 19 23:30:09 2009	(r194520)
+++ head/sys/conf/files	Fri Jun 19 23:34:32 2009	(r194521)
@@ -756,8 +756,6 @@ dev/cxgb/cxgb_offload.c		optional cxgb p
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_sge.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
-dev/cxgb/cxgb_multiq.c		optional cxgb pci \
-	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mc5.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc7323.c	optional cxgb pci \
@@ -776,8 +774,6 @@ dev/cxgb/common/cxgb_tn1010.c	optional c
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"	
 dev/cxgb/sys/uipc_mvec.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
-dev/cxgb/sys/cxgb_support.c	optional cxgb pci \
-	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_t3fw.c		optional cxgb cxgb_t3fw \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cy/cy.c			optional cy

Modified: head/sys/dev/cxgb/common/cxgb_t3_cpl.h
==============================================================================
--- head/sys/dev/cxgb/common/cxgb_t3_cpl.h	Fri Jun 19 23:30:09 2009	(r194520)
+++ head/sys/dev/cxgb/common/cxgb_t3_cpl.h	Fri Jun 19 23:34:32 2009	(r194521)
@@ -237,10 +237,21 @@ struct rss_header {
 
 #ifndef CHELSIO_FW
 struct work_request_hdr {
-	__be32 wr_hi;
-	__be32 wr_lo;
+	union {
+		struct {
+			__be32 wr_hi;
+			__be32 wr_lo;
+		} ilp32;
+		struct {
+			__be64 wr_hilo;
+		} lp64;
+	} u;
 };
 
+#define	wrh_hi		u.ilp32.wr_hi
+#define	wrh_lo		u.ilp32.wr_lo
+#define	wrh_hilo	u.lp64.wr_hilo
+
 /* wr_hi fields */
 #define S_WR_SGE_CREDITS    0
 #define M_WR_SGE_CREDITS    0xFF
@@ -817,8 +828,7 @@ struct cpl_peer_close {
 };
 
 struct tx_data_wr {
-	__be32 wr_hi;
-	__be32 wr_lo;
+	WR_HDR;
 	__be32 len;
 	__be32 flags;
 	__be32 sndseq;
@@ -936,8 +946,7 @@ struct cpl_rdma_ec_status {
 };
 
 struct mngt_pktsched_wr {
-	__be32 wr_hi;
-	__be32 wr_lo;
+	WR_HDR;
 	__u8  mngt_opcode;
 	__u8  rsvd[7];
 	__u8  sched;

Modified: head/sys/dev/cxgb/cxgb_adapter.h
==============================================================================
--- head/sys/dev/cxgb/cxgb_adapter.h	Fri Jun 19 23:30:09 2009	(r194520)
+++ head/sys/dev/cxgb/cxgb_adapter.h	Fri Jun 19 23:34:32 2009	(r194521)
@@ -35,7 +35,6 @@ $FreeBSD$
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
-#include <sys/sx.h>
 #include <sys/rman.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -63,8 +62,6 @@ $FreeBSD$
 #include <netinet/tcp_lro.h>
 #endif
 
-#define USE_SX
-
 struct adapter;
 struct sge_qset;
 extern int cxgb_debug;
@@ -82,22 +79,9 @@ extern int cxgb_debug;
 		mtx_destroy((lock));					\
 	} while (0)
 
-#define SX_INIT(lock, lockname) \
-	do { \
-		printf("initializing %s at %s:%d\n", lockname, __FILE__, __LINE__); \
-		sx_init((lock), lockname);		\
-	} while (0)
-
-#define SX_DESTROY(lock) \
-	do { \
-		printf("destroying %s at %s:%d\n", (lock)->lock_object.lo_name, __FILE__, __LINE__); \
-		sx_destroy((lock));					\
-	} while (0)
 #else
 #define MTX_INIT mtx_init
 #define MTX_DESTROY mtx_destroy
-#define SX_INIT sx_init
-#define SX_DESTROY sx_destroy
 #endif
 
 enum {
@@ -110,20 +94,17 @@ struct port_info {
 	struct adapter	*adapter;
 	struct ifnet	*ifp;
 	int		if_flags;
+	int		flags;
 	const struct port_type_info *port_type;
 	struct cphy	phy;
 	struct cmac	mac;
 	struct link_config link_config;
 	struct ifmedia	media;
-#ifdef USE_SX	
-	struct sx	lock;
-#else	
 	struct mtx	lock;
-#endif	
-	uint8_t		port_id;
-	uint8_t		tx_chan;
-	uint8_t		txpkt_intf;
-	uint8_t         first_qset;
+	uint32_t	port_id;
+	uint32_t	tx_chan;
+	uint32_t	txpkt_intf;
+	uint32_t        first_qset;
 	uint32_t	nqsets;
 	int		link_fault;
 
@@ -135,19 +116,30 @@ struct port_info {
 #define PORT_NAME_LEN 32
 	char            lockbuf[PORT_LOCK_NAME_LEN];
 	char            namebuf[PORT_NAME_LEN];
-};
+} __aligned(L1_CACHE_BYTES);
 
-enum {				/* adapter flags */
+enum {
+	/* adapter flags */
 	FULL_INIT_DONE	= (1 << 0),
 	USING_MSI	= (1 << 1),
 	USING_MSIX	= (1 << 2),
 	QUEUES_BOUND	= (1 << 3),
-	FW_UPTODATE     = (1 << 4),
-	TPS_UPTODATE    = (1 << 5),
+	FW_UPTODATE	= (1 << 4),
+	TPS_UPTODATE	= (1 << 5),
 	CXGB_SHUTDOWN	= (1 << 6),
 	CXGB_OFLD_INIT	= (1 << 7),
-	TP_PARITY_INIT  = (1 << 8),
+	TP_PARITY_INIT	= (1 << 8),
+	CXGB_BUSY	= (1 << 9),
+
+	/* port flags */
+	DOOMED		= (1 << 0),
 };
+#define IS_DOOMED(p)	(p->flags & DOOMED)
+#define SET_DOOMED(p)	do {p->flags |= DOOMED;} while (0)
+#define DOOMED(p)	(p->flags & DOOMED)
+#define IS_BUSY(sc)	(sc->flags & CXGB_BUSY)
+#define SET_BUSY(sc)	do {sc->flags |= CXGB_BUSY;} while (0)
+#define CLR_BUSY(sc)	do {sc->flags &= ~CXGB_BUSY;} while (0)
 
 #define FL_Q_SIZE	4096
 #define JUMBO_Q_SIZE	1024
@@ -205,10 +197,6 @@ struct sge_rspq {
 	uint32_t	rspq_dump_count;
 };
 
-#ifndef DISABLE_MBUF_IOVEC
-#define rspq_mbuf rspq_mh.mh_head
-#endif
-
 struct rx_desc;
 struct rx_sw_desc;
 
@@ -253,7 +241,6 @@ struct sge_txq {
 	bus_addr_t	phys_addr;
 	struct task     qresume_task;
 	struct task     qreclaim_task;
-	struct port_info *port;
 	uint32_t	cntxt_id;
 	uint64_t	stops;
 	uint64_t	restarts;
@@ -261,26 +248,21 @@ struct sge_txq {
 	bus_dmamap_t	desc_map;
 	bus_dma_tag_t   entry_tag;
 	struct mbuf_head sendq;
-	/*
-	 * cleanq should really be an buf_ring to avoid extra
-	 * mbuf touches
-	 */
-	struct mbuf_head cleanq;	
+
 	struct buf_ring *txq_mr;
 	struct ifaltq	*txq_ifq;
-	struct mbuf     *immpkt;
-
+	struct callout	txq_timer;
+	struct callout	txq_watchdog;
+	uint64_t        txq_coalesced;
 	uint32_t        txq_drops;
 	uint32_t        txq_skipped;
-	uint32_t        txq_coalesced;
 	uint32_t        txq_enqueued;
 	uint32_t	txq_dump_start;
 	uint32_t	txq_dump_count;
-	unsigned long   txq_frees;
-	struct mtx      lock;
+	uint64_t	txq_direct_packets;
+	uint64_t	txq_direct_bytes;	
+	uint64_t	txq_frees;
 	struct sg_ent  txq_sgl[TX_MAX_SEGS / 2 + 1];
-	#define TXQ_NAME_LEN  32
-	char            lockbuf[TXQ_NAME_LEN];
 };
      	
 
@@ -297,6 +279,8 @@ enum {
 #define QS_EXITING              0x1
 #define QS_RUNNING              0x2
 #define QS_BOUND                0x4
+#define	QS_FLUSHING		0x8
+#define	QS_TIMEOUT		0x10
 
 struct sge_qset {
 	struct sge_rspq		rspq;
@@ -309,10 +293,10 @@ struct sge_qset {
 	uint64_t                port_stats[SGE_PSTAT_MAX];
 	struct port_info        *port;
 	int                     idx; /* qset # */
-	int                     qs_cpuid;
 	int                     qs_flags;
+	int			coalescing;
 	struct cv		qs_cv;
-	struct mtx		qs_mtx;
+	struct mtx		lock;
 #define QS_NAME_LEN 32
 	char                    namebuf[QS_NAME_LEN];
 };
@@ -328,7 +312,7 @@ struct adapter {
 	device_t		dev;
 	int			flags;
 	TAILQ_ENTRY(adapter)    adapter_entry;
-	
+
 	/* PCI register resources */
 	int			regs_rid;
 	struct resource		*regs_res;
@@ -401,11 +385,7 @@ struct adapter {
 	char                    port_types[MAX_NPORTS + 1];
 	uint32_t                open_device_map;
 	uint32_t                registered_device_map;
-#ifdef USE_SX
-	struct sx               lock;
-#else	
 	struct mtx              lock;
-#endif	
 	driver_intr_t           *cxgb_intr;
 	int                     msi_count;
 
@@ -422,31 +402,17 @@ struct t3_rx_mode {
 	struct port_info        *port;
 };
 
-
 #define MDIO_LOCK(adapter)	mtx_lock(&(adapter)->mdio_lock)
 #define MDIO_UNLOCK(adapter)	mtx_unlock(&(adapter)->mdio_lock)
 #define ELMR_LOCK(adapter)	mtx_lock(&(adapter)->elmer_lock)
 #define ELMR_UNLOCK(adapter)	mtx_unlock(&(adapter)->elmer_lock)
 
 
-#ifdef USE_SX
-#define PORT_LOCK(port)		     sx_xlock(&(port)->lock);
-#define PORT_UNLOCK(port)	     sx_xunlock(&(port)->lock);
-#define PORT_LOCK_INIT(port, name)   SX_INIT(&(port)->lock, name)
-#define PORT_LOCK_DEINIT(port)       SX_DESTROY(&(port)->lock)
-#define PORT_LOCK_ASSERT_OWNED(port) sx_assert(&(port)->lock, SA_LOCKED)
-
-#define ADAPTER_LOCK(adap)	           sx_xlock(&(adap)->lock);
-#define ADAPTER_UNLOCK(adap)	           sx_xunlock(&(adap)->lock);
-#define ADAPTER_LOCK_INIT(adap, name)      SX_INIT(&(adap)->lock, name)
-#define ADAPTER_LOCK_DEINIT(adap)          SX_DESTROY(&(adap)->lock)
-#define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) sx_assert(&(adap)->lock, SA_UNLOCKED)
-#define ADAPTER_LOCK_ASSERT_OWNED(adap) sx_assert(&(adap)->lock, SA_LOCKED)
-#else
 #define PORT_LOCK(port)		     mtx_lock(&(port)->lock);
 #define PORT_UNLOCK(port)	     mtx_unlock(&(port)->lock);
 #define PORT_LOCK_INIT(port, name)   mtx_init(&(port)->lock, name, 0, MTX_DEF)
 #define PORT_LOCK_DEINIT(port)       mtx_destroy(&(port)->lock)
+#define PORT_LOCK_ASSERT_NOTOWNED(port) mtx_assert(&(port)->lock, MA_NOTOWNED)
 #define PORT_LOCK_ASSERT_OWNED(port) mtx_assert(&(port)->lock, MA_OWNED)
 
 #define ADAPTER_LOCK(adap)	mtx_lock(&(adap)->lock);
@@ -455,7 +421,6 @@ struct t3_rx_mode {
 #define ADAPTER_LOCK_DEINIT(adap) mtx_destroy(&(adap)->lock)
 #define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) mtx_assert(&(adap)->lock, MA_NOTOWNED)
 #define ADAPTER_LOCK_ASSERT_OWNED(adap) mtx_assert(&(adap)->lock, MA_OWNED)
-#endif
 
 
 static __inline uint32_t
@@ -555,14 +520,11 @@ void t3_sge_stop(adapter_t *);
 void t3b_intr(void *data);
 void t3_intr_msi(void *data);
 void t3_intr_msix(void *data);
-int t3_encap(struct sge_qset *, struct mbuf **, int);
 
 int t3_sge_init_adapter(adapter_t *);
 int t3_sge_reset_adapter(adapter_t *);
 int t3_sge_init_port(struct port_info *);
-void t3_sge_deinit_sw(adapter_t *);
-void t3_free_tx_desc(struct sge_txq *q, int n);
-void t3_free_tx_desc_all(struct sge_txq *q);
+void t3_free_tx_desc(struct sge_qset *qs, int n, int qid);
 
 void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad);
 
@@ -615,13 +577,8 @@ static inline int offload_running(adapte
         return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
 }
 
-int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m);
-int cxgb_pcpu_transmit(struct ifnet *ifp, struct mbuf *m);
-void cxgb_pcpu_shutdown_threads(struct adapter *sc);
-void cxgb_pcpu_startup_threads(struct adapter *sc);
-
-int process_responses(adapter_t *adap, struct sge_qset *qs, int budget);
-void t3_free_qset(adapter_t *sc, struct sge_qset *q);
+void cxgb_tx_watchdog(void *arg);
+int cxgb_transmit(struct ifnet *ifp, struct mbuf *m);
+void cxgb_qflush(struct ifnet *ifp);
 void cxgb_start(struct ifnet *ifp);
-void refill_fl_service(adapter_t *adap, struct sge_fl *fl);
 #endif

Modified: head/sys/dev/cxgb/cxgb_main.c
==============================================================================
--- head/sys/dev/cxgb/cxgb_main.c	Fri Jun 19 23:30:09 2009	(r194520)
+++ head/sys/dev/cxgb/cxgb_main.c	Fri Jun 19 23:34:32 2009	(r194521)
@@ -84,10 +84,12 @@ __FBSDID("$FreeBSD$");
 
 static int cxgb_setup_interrupts(adapter_t *);
 static void cxgb_teardown_interrupts(adapter_t *);
+static int cxgb_begin_op(struct port_info *, const char *);
+static int cxgb_begin_detach(struct port_info *);
+static int cxgb_end_op(struct port_info *);
 static void cxgb_init(void *);
-static void cxgb_init_locked(struct port_info *);
-static void cxgb_stop_locked(struct port_info *);
-static void cxgb_set_rxmode(struct port_info *);
+static int cxgb_init_synchronized(struct port_info *);
+static int cxgb_uninit_synchronized(struct port_info *);
 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
 static int cxgb_media_change(struct ifnet *);
 static int cxgb_ifm_type(int);
@@ -96,7 +98,6 @@ static int setup_sge_qsets(adapter_t *);
 static void cxgb_async_intr(void *);
 static void cxgb_ext_intr_handler(void *, int);
 static void cxgb_tick_handler(void *, int);
-static void cxgb_down_locked(struct adapter *sc);
 static void cxgb_tick(void *);
 static void setup_rss(adapter_t *sc);
 
@@ -114,7 +115,6 @@ static int cxgb_get_regs_len(void);
 static int offload_open(struct port_info *pi);
 static void touch_bars(device_t dev);
 static int offload_close(struct t3cdev *tdev);
-static void cxgb_link_start(struct port_info *p);
 int t3_detect_link_fault(adapter_t *adapter, int port_id);
 
 static device_method_t cxgb_controller_methods[] = {
@@ -722,29 +722,47 @@ cxgb_free(struct adapter *sc)
 	sc->flags |= CXGB_SHUTDOWN;
 	ADAPTER_UNLOCK(sc);
 
-	cxgb_pcpu_shutdown_threads(sc);
-
-	ADAPTER_LOCK(sc);
-	cxgb_down_locked(sc);
-	ADAPTER_UNLOCK(sc);
-	
-	t3_sge_deinit_sw(sc);
 	/*
-	 * Wait for last callout
+	 * Make sure all child devices are gone.
 	 */
-	
-	DELAY(hz*100);
-
 	bus_generic_detach(sc->dev);
-
 	for (i = 0; i < (sc)->params.nports; i++) {
 		if (sc->portdev[i] &&
 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
 			device_printf(sc->dev, "failed to delete child port\n");
 	}
 
-	cxgb_teardown_interrupts(sc);
+	/*
+	 * At this point, it is as if cxgb_port_detach has run on all ports, and
+	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
+	 * all open devices have been closed.
+	 */
+	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
+					   __func__, sc->open_device_map));
+	for (i = 0; i < sc->params.nports; i++) {
+		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
+						  __func__, i));
+	}
 
+	/*
+	 * Finish off the adapter's callouts.
+	 */
+	callout_drain(&sc->cxgb_tick_ch);
+	callout_drain(&sc->sge_timer_ch);
+
+	/*
+	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
+	 * sysctls are cleaned up by the kernel linker.
+	 */
+	if (sc->flags & FULL_INIT_DONE) {
+ 		t3_free_sge_resources(sc);
+ 		sc->flags &= ~FULL_INIT_DONE;
+ 	}
+
+	/*
+	 * Release all interrupt resources.
+	 */
+	cxgb_teardown_interrupts(sc);
 #ifdef MSI_SUPPORTED
 	if (sc->flags & (USING_MSI | USING_MSIX)) {
 		device_printf(sc->dev, "releasing msi message(s)\n");
@@ -759,26 +777,26 @@ cxgb_free(struct adapter *sc)
 	}
 #endif
 
+	/*
+	 * Free the adapter's taskqueue.
+	 */
 	if (sc->tq != NULL) {
 		taskqueue_free(sc->tq);
 		sc->tq = NULL;
 	}
 	
 	if (is_offload(sc)) {
+		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
 		cxgb_adapter_unofld(sc);
-		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
-			offload_close(&sc->tdev);
-		else
-			printf("cxgb_free: DEVMAP_BIT not set\n");
-	} else
-		printf("not offloading set\n");	
+	}
+
 #ifdef notyet
 	if (sc->flags & CXGB_OFLD_INIT)
 		cxgb_offload_deactivate(sc);
 #endif
 	free(sc->filters, M_DEVBUF);
 	t3_sge_free(sc);
-	
+
 	cxgb_offload_exit();
 
 	if (sc->udbs_res != NULL)
@@ -1052,10 +1070,9 @@ cxgb_port_attach(device_t dev)
 	}
 
 	ether_ifattach(ifp, p->hw_addr);
+	ifp->if_transmit = cxgb_transmit;
+	ifp->if_qflush = cxgb_qflush;
 
-#ifdef IFNET_MULTIQUEUE
-	ifp->if_transmit = cxgb_pcpu_transmit;
-#endif
 	/*
 	 * Only default to jumbo frames on 10GigE
 	 */
@@ -1112,15 +1129,8 @@ cxgb_port_attach(device_t dev)
 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
 	}	
 
-	/* Get the latest mac address, User can use a LAA */
-	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
 	t3_sge_init_port(p);
 
-#if defined(LINK_ATTACH)	
-	cxgb_link_start(p);
-	t3_link_changed(sc, p->port_id);
-#endif
-
 	return (err);
 }
 
@@ -1130,46 +1140,38 @@ cxgb_port_attach(device_t dev)
  * removing the device from the view of the kernel, i.e. from all 
  * interfaces lists etc.  This routine is only called when the driver is 
  * being unloaded, not when the link goes down.
- * 
  */
 static int
 cxgb_port_detach(device_t dev)
 {
 	struct port_info *p;
 	struct adapter *sc;
+	int i;
 
 	p = device_get_softc(dev);
 	sc = p->adapter;
 
+	cxgb_begin_detach(p);
+
 	if (p->port_cdev != NULL)
 		destroy_dev(p->port_cdev);
-	
+
+	cxgb_uninit_synchronized(p);
 	ether_ifdetach(p->ifp);
 
-	PORT_LOCK(p);
-	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING) 
-		cxgb_stop_locked(p);
-	PORT_UNLOCK(p);
-	
-	callout_drain(&sc->cxgb_tick_ch);
-	callout_drain(&sc->sge_timer_ch);
-	
-	if (sc->tq != NULL) {
-		printf("draining slow intr\n");
-		
-		taskqueue_drain(sc->tq, &sc->slow_intr_task);
-			printf("draining ext intr\n");	
-		taskqueue_drain(sc->tq, &sc->ext_intr_task);
-		printf("draining tick task\n");
-		taskqueue_drain(sc->tq, &sc->tick_task);
+	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
+		struct sge_qset *qs = &sc->sge.qs[i];
+		struct sge_txq *txq = &qs->txq[TXQ_ETH];
+
+		callout_drain(&txq->txq_watchdog);
+		callout_drain(&txq->txq_timer);
 	}
 
-	/*
-	 * the lock may be acquired in ifdetach
-	 */
 	PORT_LOCK_DEINIT(p);
 	if_free(p->ifp);
-	
+	p->ifp = NULL;
+
+	cxgb_end_op(p);
 	return (0);
 }
 
@@ -1276,12 +1278,16 @@ t3_os_link_changed(adapter_t *adapter, i
      int duplex, int fc)
 {
 	struct port_info *pi = &adapter->port[port_id];
+	struct ifnet *ifp = pi->ifp;
+
+	/* no race with detach, so ifp should always be good */
+	KASSERT(ifp, ("%s: if detached.", __func__));
 
 	if (link_status) {
-		pi->ifp->if_baudrate = IF_Mbps(speed);
-		if_link_state_change(pi->ifp, LINK_STATE_UP);
+		ifp->if_baudrate = IF_Mbps(speed);
+		if_link_state_change(ifp, LINK_STATE_UP);
 	} else
-		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
+		if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 /**
@@ -1325,13 +1331,13 @@ t3_os_ext_intr_handler(adapter_t *sc)
 	 * interrupts in the meantime and let the task reenable them when
 	 * it's done.
 	 */
-	ADAPTER_LOCK(sc);
 	if (sc->slow_intr_mask) {
+		ADAPTER_LOCK(sc);
 		sc->slow_intr_mask &= ~F_T3DBG;
 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
+		ADAPTER_UNLOCK(sc);
 	}
-	ADAPTER_UNLOCK(sc);
 }
 
 void
@@ -1348,21 +1354,19 @@ t3_os_set_hw_addr(adapter_t *adapter, in
 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
 }
 
-/**
- *	link_start - enable a port
- *	@p: the port to enable
- *
- *	Performs the MAC and PHY actions needed to enable a port.
+/*
+ * Programs the XGMAC based on the settings in the ifnet.  These settings
+ * include MTU, MAC address, mcast addresses, etc.
  */
 static void
-cxgb_link_start(struct port_info *p)
+cxgb_update_mac_settings(struct port_info *p)
 {
-	struct ifnet *ifp;
+	struct ifnet *ifp = p->ifp;
 	struct t3_rx_mode rm;
 	struct cmac *mac = &p->mac;
 	int mtu, hwtagging;
 
-	ifp = p->ifp;
+	PORT_LOCK_ASSERT_OWNED(p);
 
 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
 
@@ -1372,15 +1376,11 @@ cxgb_link_start(struct port_info *p)
 
 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
 
-	t3_init_rx_mode(&rm, p);
-	if (!mac->multiport) 
-		t3_mac_reset(mac);
 	t3_mac_set_mtu(mac, mtu);
 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
 	t3_mac_set_address(mac, 0, p->hw_addr);
+	t3_init_rx_mode(&rm, p);
 	t3_mac_set_rx_mode(mac, &rm);
-	t3_link_start(&p->phy, mac, &p->link_config);
-	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
 }
 
 
@@ -1415,7 +1415,7 @@ init_tp_parity(struct adapter *adap)
 		req = mtod(m, struct cpl_smt_write_req *);
 		m->m_len = m->m_pkthdr.len = sizeof(*req);
 		memset(req, 0, sizeof(*req));
-		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
 		req->iff = i;
 		t3_mgmt_tx(adap, m);
@@ -1428,7 +1428,7 @@ init_tp_parity(struct adapter *adap)
 		req = mtod(m, struct cpl_l2t_write_req *);
 		m->m_len = m->m_pkthdr.len = sizeof(*req);
 		memset(req, 0, sizeof(*req));
-		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
 		req->params = htonl(V_L2T_W_IDX(i));
 		t3_mgmt_tx(adap, m);
@@ -1441,7 +1441,7 @@ init_tp_parity(struct adapter *adap)
 		req = mtod(m, struct cpl_rte_write_req *);
 		m->m_len = m->m_pkthdr.len = sizeof(*req);
 		memset(req, 0, sizeof(*req));
-		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
 		t3_mgmt_tx(adap, m);
@@ -1451,7 +1451,7 @@ init_tp_parity(struct adapter *adap)
 	greq = mtod(m, struct cpl_set_tcb_field *);
 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
 	memset(greq, 0, sizeof(*greq));
-	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
 	greq->mask = htobe64(1);
 	t3_mgmt_tx(adap, m);
@@ -1532,7 +1532,7 @@ write_smt_entry(struct adapter *adapter,
 	req = mtod(m, struct cpl_smt_write_req *);
 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
 	
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
 	req->iff = idx;
@@ -1559,10 +1559,8 @@ init_smt(struct adapter *adapter)
 static void
 init_port_mtus(adapter_t *adapter)
 {
-	unsigned int mtus = adapter->port[0].ifp->if_mtu;
+	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
 
-	if (adapter->port[1].ifp)
-		mtus |= adapter->port[1].ifp->if_mtu << 16;
 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
 }
 
@@ -1576,7 +1574,7 @@ send_pktsched_cmd(struct adapter *adap, 
 	m = m_gethdr(M_DONTWAIT, MT_DATA);
 	if (m) {	
 		req = mtod(m, struct mngt_pktsched_wr *);
-		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
+		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
 		req->sched = sched;
 		req->idx = qidx;
@@ -1593,7 +1591,6 @@ bind_qsets(adapter_t *sc)
 {
 	int i, j;
 
-	cxgb_pcpu_startup_threads(sc);
 	for (i = 0; i < (sc)->params.nports; ++i) {
 		const struct port_info *pi = adap2pinfo(sc, i);
 
@@ -1717,14 +1714,20 @@ cxgb_up(struct adapter *sc)
 {
 	int err = 0;
 
+	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
+					   __func__, sc->open_device_map));
+
 	if ((sc->flags & FULL_INIT_DONE) == 0) {
 
 		if ((sc->flags & FW_UPTODATE) == 0)
 			if ((err = upgrade_fw(sc)))
 				goto out;
+
 		if ((sc->flags & TPS_UPTODATE) == 0)
 			if ((err = update_tpsram(sc)))
 				goto out;
+
 		err = t3_init_hw(sc, 0);
 		if (err)
 			goto out;
@@ -1756,78 +1759,53 @@ cxgb_up(struct adapter *sc)
 		sc->flags |= TP_PARITY_INIT;
 
 	if (sc->flags & TP_PARITY_INIT) {
-		t3_write_reg(sc, A_TP_INT_CAUSE,
-				F_CMCACHEPERR | F_ARPLUTPERR);
+		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
 	}
-
 	
 	if (!(sc->flags & QUEUES_BOUND)) {
 		bind_qsets(sc);
 		sc->flags |= QUEUES_BOUND;		
 	}
+
+	t3_sge_reset_adapter(sc);
 out:
 	return (err);
 }
 
-
 /*
- * Bring down the interface but do not free any resources.
+ * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
+ * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
+ * during controller_detach, not here.
  */
 static void
-cxgb_down_locked(struct adapter *sc)
+cxgb_down(struct adapter *sc)
 {
-	
+	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+
 	t3_sge_stop(sc);
 	t3_intr_disable(sc);
-
-	callout_stop(&sc->cxgb_tick_ch);
-	callout_stop(&sc->sge_timer_ch);
 }
 
 static int
 offload_open(struct port_info *pi)
 {
-	struct adapter *adapter = pi->adapter;
-	struct t3cdev *tdev = &adapter->tdev;
+	struct adapter *sc = pi->adapter;
+	struct t3cdev *tdev = &sc->tdev;
 
-	int adap_up = adapter->open_device_map & PORT_MASK;
-	int err = 0;
+	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
 
-	if (atomic_cmpset_int(&adapter->open_device_map,
-		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
-		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
-		return (0);
+	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
 
-	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) 
-		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n",
-		    adapter->open_device_map);
-	ADAPTER_LOCK(pi->adapter); 
-	if (!adap_up)
-		err = cxgb_up(adapter);
-	ADAPTER_UNLOCK(pi->adapter);
-	if (err)
-		return (err);
-
-	t3_tp_set_offload_mode(adapter, 1);
+	t3_tp_set_offload_mode(sc, 1);
 	tdev->lldev = pi->ifp;
-
-	init_port_mtus(adapter);
-	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
-		     adapter->params.b_wnd,
-		     adapter->params.rev == 0 ?
-		       adapter->port[0].ifp->if_mtu : 0xffff);
-	init_smt(adapter);
-	/* Call back all registered clients */
+	init_port_mtus(sc);
+	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
+		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
+	init_smt(sc);
 	cxgb_add_clients(tdev);
 
-	/* restore them in case the offload module has changed them */
-	if (err) {
-		t3_tp_set_offload_mode(adapter, 0);
-		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
-		cxgb_set_dummy_ops(tdev);
-	}
-	return (err);
+	return (0);
 }
 
 static int
@@ -1844,147 +1822,220 @@ offload_close(struct t3cdev *tdev)
 	tdev->lldev = NULL;
 	cxgb_set_dummy_ops(tdev);
 	t3_tp_set_offload_mode(adapter, 0);
+
 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
 
-	ADAPTER_LOCK(adapter);
-	if (!adapter->open_device_map)
-		cxgb_down_locked(adapter);
+	return (0);
+}
+
+/*
+ * Begin a synchronized operation.  If this call succeeds, it is guaranteed that
+ * no one will remove the port or its ifp from underneath the caller.  Caller is
+ * also granted exclusive access to open_device_map.
+ *
+ * operation here means init, uninit, detach, and ioctl service.
+ *
+ * May fail.
+ * EINTR (ctrl-c pressed during ifconfig for example).
+ * ENXIO (port is about to detach - due to kldunload for example).
+ */
+int
+cxgb_begin_op(struct port_info *p, const char *wmsg)
+{
+	int rc = 0;
+	struct adapter *sc = p->adapter;
+
+	ADAPTER_LOCK(sc);
+
+	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
+		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, wmsg, 0)) {
+			rc = EINTR;
+			goto done;
+		}
+	}
+
+	if (IS_DOOMED(p))
+		rc = ENXIO;
+	else if (!IS_BUSY(sc))
+		SET_BUSY(sc);
+	else {
+		KASSERT(0, ("%s: port %d, p->flags = %x , sc->flags = %x",
+			    __func__, p->port_id, p->flags, sc->flags));
+		rc = EDOOFUS;
+	}
+
+done:
+	ADAPTER_UNLOCK(sc);
+	return (rc);
+}
 
-	ADAPTER_UNLOCK(adapter);
+/*
+ * End a synchronized operation.  Read comment block above cxgb_begin_op.
+ */
+int
+cxgb_end_op(struct port_info *p)
+{
+	struct adapter *sc = p->adapter;
+
+	ADAPTER_LOCK(sc);
+	KASSERT(IS_BUSY(sc), ("%s: not busy.", __func__));
+	CLR_BUSY(sc);
+	wakeup_one(&sc->flags);
+	ADAPTER_UNLOCK(sc);
 
 	return (0);
 }
 
+/*
+ * Prepare for port detachment.  Detach is a special kind of synchronized
+ * operation.  Also read comment before cxgb_begin_op.
+ */
+static int
+cxgb_begin_detach(struct port_info *p)
+{
+	struct adapter *sc = p->adapter;
 
+	/*
+	 * Inform those waiting for this port that it is going to be destroyed
+	 * and they should not continue further.  (They'll return with ENXIO).
+	 */
+	ADAPTER_LOCK(sc);
+	SET_DOOMED(p);
+	wakeup(&sc->flags);
+	ADAPTER_UNLOCK(sc);
+
+	/*
+	 * Wait for in-progress operations.
+	 */
+	ADAPTER_LOCK(sc);
+	while (IS_BUSY(sc)) {
+		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
+	}
+	SET_BUSY(sc);
+	ADAPTER_UNLOCK(sc);
+
+	return (0);
+}
+
+/*
+ * if_init for cxgb ports.
+ */
 static void
 cxgb_init(void *arg)
 {
 	struct port_info *p = arg;
 
-	PORT_LOCK(p);
-	cxgb_init_locked(p);
-	PORT_UNLOCK(p);
+	if (cxgb_begin_op(p, "cxgbinit"))
+		return;
+
+	cxgb_init_synchronized(p);
+	cxgb_end_op(p);
 }
 
-static void
-cxgb_init_locked(struct port_info *p)
+static int
+cxgb_init_synchronized(struct port_info *p)
 {
-	struct ifnet *ifp;
-	adapter_t *sc = p->adapter;
-	int err;
-
-	PORT_LOCK_ASSERT_OWNED(p);
-	ifp = p->ifp;
+	struct adapter *sc = p->adapter;
+	struct ifnet *ifp = p->ifp;
+	struct cmac *mac = &p->mac;
+	int i, rc;
 
-	ADAPTER_LOCK(p->adapter);
-	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
-		ADAPTER_UNLOCK(p->adapter);
-		cxgb_stop_locked(p);
-		return;
-	}
-	if (p->adapter->open_device_map == 0) {
-		t3_intr_clear(sc);
-	}
-	setbit(&p->adapter->open_device_map, p->port_id);
-	ADAPTER_UNLOCK(p->adapter);
+	if (sc->open_device_map == 0) {
+		if ((rc = cxgb_up(sc)) != 0)
+			return (rc);
 
-	if (is_offload(sc) && !ofld_disable) {
-		err = offload_open(p);
-		if (err)
+		if (is_offload(sc) && !ofld_disable && offload_open(p))
 			log(LOG_WARNING,
 			    "Could not initialize offload capabilities\n");
 	}
 
-	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
+	PORT_LOCK(p);
 	t3_port_intr_enable(sc, p->port_id);
+	if (!mac->multiport) 
+		t3_mac_reset(mac);
+	cxgb_update_mac_settings(p);
+	t3_link_start(&p->phy, mac, &p->link_config);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list