svn commit: r205720 - in head/sys: dev/ixgbe modules/ixgbe

Jack F Vogel jfv at FreeBSD.org
Sat Mar 27 00:21:40 UTC 2010


Author: jfv
Date: Sat Mar 27 00:21:40 2010
New Revision: 205720
URL: http://svn.freebsd.org/changeset/base/205720

Log:
  Update the driver to Intel version 2.1.6
  	- add some new hardware support for 82599
  	- Big change to interrupt architecture, it now
  	  uses a queue which contains an RX/TX pair as
  	  the recipient of the interrupt. This will reduce
  	  overall system interrupts/msix usage.
  	- Improved RX mbuf handling: the old get_buf routine
  	  is no longer synchronized with rxeof, this allows
  	  the elimination of packet discards due to mbuf
  	  allocation failure.
  	- Much simplified and improved AIM code, it now
  	  happens in the queue interrupt context and takes
  	  into account both the traffic on the RX AND TX
  	  side.
  	- variety of small tweaks, like ring size, that have
  	  been seen as performance improvements.
  	- Thanks to those that provided feedback or suggested
  	  changes, I hope I've caught all of them.

Modified:
  head/sys/dev/ixgbe/LICENSE
  head/sys/dev/ixgbe/ixgbe.c
  head/sys/dev/ixgbe/ixgbe.h
  head/sys/dev/ixgbe/ixgbe_82598.c
  head/sys/dev/ixgbe/ixgbe_82599.c
  head/sys/dev/ixgbe/ixgbe_api.c
  head/sys/dev/ixgbe/ixgbe_api.h
  head/sys/dev/ixgbe/ixgbe_common.c
  head/sys/dev/ixgbe/ixgbe_phy.c
  head/sys/dev/ixgbe/ixgbe_phy.h
  head/sys/dev/ixgbe/ixgbe_type.h
  head/sys/modules/ixgbe/Makefile

Modified: head/sys/dev/ixgbe/LICENSE
==============================================================================
--- head/sys/dev/ixgbe/LICENSE	Fri Mar 26 23:44:51 2010	(r205719)
+++ head/sys/dev/ixgbe/LICENSE	Sat Mar 27 00:21:40 2010	(r205720)
@@ -1,6 +1,6 @@
 /******************************************************************************
 
-  Copyright (c) 2001-2009, Intel Corporation 
+  Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 

Modified: head/sys/dev/ixgbe/ixgbe.c
==============================================================================
--- head/sys/dev/ixgbe/ixgbe.c	Fri Mar 26 23:44:51 2010	(r205719)
+++ head/sys/dev/ixgbe/ixgbe.c	Sat Mar 27 00:21:40 2010	(r205720)
@@ -1,6 +1,6 @@
 /******************************************************************************
 
-  Copyright (c) 2001-2009, Intel Corporation 
+  Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
@@ -46,7 +46,7 @@ int             ixgbe_display_debug_stat
 /*********************************************************************
  *  Driver version
  *********************************************************************/
-char ixgbe_driver_version[] = "2.0.7";
+char ixgbe_driver_version[] = "2.1.6";
 
 /*********************************************************************
  *  PCI Device ID Table
@@ -76,6 +76,7 @@ static ixgbe_vendor_info_t ixgbe_vendor_
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
+	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
 	/* required last entry */
 	{0, 0, 0, 0, 0}
@@ -136,12 +137,11 @@ static void     ixgbe_free_receive_struc
 static void     ixgbe_free_receive_buffers(struct rx_ring *);
 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
 
-static void	ixgbe_init_moderation(struct adapter *);
 static void     ixgbe_enable_intr(struct adapter *);
 static void     ixgbe_disable_intr(struct adapter *);
 static void     ixgbe_update_stats_counters(struct adapter *);
 static bool	ixgbe_txeof(struct tx_ring *);
-static bool	ixgbe_rxeof(struct rx_ring *, int);
+static bool	ixgbe_rxeof(struct ix_queue *, int);
 static void	ixgbe_rx_checksum(u32, struct mbuf *);
 static void     ixgbe_set_promisc(struct adapter *);
 static void     ixgbe_disable_promisc(struct adapter *);
@@ -149,7 +149,7 @@ static void     ixgbe_set_multi(struct a
 static void     ixgbe_print_hw_stats(struct adapter *);
 static void	ixgbe_print_debug_info(struct adapter *);
 static void     ixgbe_update_link_status(struct adapter *);
-static int	ixgbe_get_buf(struct rx_ring *, int, int);
+static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
 static int      ixgbe_sysctl_stats(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_debug(SYSCTL_HANDLER_ARGS);
@@ -169,7 +169,9 @@ static void	ixgbe_setup_vlan_hw_support(
 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
 
-static void	ixgbe_update_aim(struct rx_ring *);
+static __inline void ixgbe_rx_discard(struct rx_ring *, int);
+static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
+		    struct mbuf *, u32);
 
 /* Support for pluggable optic modules */
 static bool	ixgbe_sfp_probe(struct adapter *);
@@ -178,13 +180,11 @@ static bool	ixgbe_sfp_probe(struct adapt
 static void	ixgbe_legacy_irq(void *);
 
 /* The MSI/X Interrupt handlers */
-static void	ixgbe_msix_tx(void *);
-static void	ixgbe_msix_rx(void *);
+static void	ixgbe_msix_que(void *);
 static void	ixgbe_msix_link(void *);
 
 /* Deferred interrupt tasklets */
-static void	ixgbe_handle_tx(void *, int);
-static void	ixgbe_handle_rx(void *, int);
+static void	ixgbe_handle_que(void *, int);
 static void	ixgbe_handle_link(void *, int);
 static void	ixgbe_handle_msf(void *, int);
 static void	ixgbe_handle_mod(void *, int);
@@ -222,23 +222,16 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
 */
 
 /*
-** These  parameters are used in Adaptive 
-** Interrupt Moderation. The value is set
-** into EITR and controls the interrupt
-** frequency. They can be modified but 
-** be careful in tuning them.
+** AIM: Adaptive Interrupt Moderation
+** which means that the interrupt rate
+** is varied over time based on the
+** traffic for that interrupt vector
 */
 static int ixgbe_enable_aim = TRUE;
 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
-static int ixgbe_low_latency = IXGBE_LOW_LATENCY;
-TUNABLE_INT("hw.ixgbe.low_latency", &ixgbe_low_latency);
-static int ixgbe_ave_latency = IXGBE_AVE_LATENCY;
-TUNABLE_INT("hw.ixgbe.ave_latency", &ixgbe_ave_latency);
-static int ixgbe_bulk_latency = IXGBE_BULK_LATENCY;
-TUNABLE_INT("hw.ixgbe.bulk_latency", &ixgbe_bulk_latency);
 
 /* How many packets rxeof tries to clean at a time */
-static int ixgbe_rx_process_limit = 100;
+static int ixgbe_rx_process_limit = 128;
 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
 
 /* Flow control setting, default to full */
@@ -271,20 +264,24 @@ static bool ixgbe_header_split = TRUE;
 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
 
 /*
- * Number of Queues, should normally
- * be left at 0, it then autoconfigures to
- * the number of cpus. Each queue is a pair
- * of RX and TX rings with a dedicated interrupt
+ * Number of Queues, can be set to 0,
+ * it then autoconfigures based on the
+ * number of cpus. Each queue is a pair
+ * of RX and TX rings with a msix vector
  */
 static int ixgbe_num_queues = 0;
 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
 
-/* Number of TX descriptors per ring */
-static int ixgbe_txd = DEFAULT_TXD;
+/*
+** Number of TX descriptors per ring,
+** setting higher than RX as this seems
+** the better performing choice.
+*/
+static int ixgbe_txd = PERFORM_TXD;
 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
 
 /* Number of RX descriptors per ring */
-static int ixgbe_rxd = DEFAULT_RXD;
+static int ixgbe_rxd = PERFORM_RXD;
 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
 
 /* Keep running tab on them for sanity check */
@@ -420,9 +417,11 @@ ixgbe_attach(device_t dev)
 		case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM :
 		case IXGBE_DEV_ID_82598EB_SFP_LOM :
 		case IXGBE_DEV_ID_82598AT :
-		case IXGBE_DEV_ID_82598AT2 :
 			adapter->optics = IFM_10G_SR;
 			break;
+		case IXGBE_DEV_ID_82598AT2 :
+			adapter->optics = IFM_10G_T;
+			break;
 		case IXGBE_DEV_ID_82598EB_XF_LR :
 			adapter->optics = IFM_10G_LR;
 			break;
@@ -439,6 +438,10 @@ ixgbe_attach(device_t dev)
 		case IXGBE_DEV_ID_82599_XAUI_LOM :
 		case IXGBE_DEV_ID_82599_COMBO_BACKPLANE :
 			ixgbe_num_segs = IXGBE_82599_SCATTER;
+			break;
+		case IXGBE_DEV_ID_82599_T3_LOM:
+			ixgbe_num_segs = IXGBE_82599_SCATTER;
+			adapter->optics = IFM_10G_T;
 		default:
 			break;
 	}
@@ -464,21 +467,6 @@ ixgbe_attach(device_t dev)
 			OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
 			&ixgbe_enable_aim, 1, "Interrupt Moderation");
 
-        SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
-			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
-			OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
-			&ixgbe_low_latency, 1, "Low Latency");
-
-        SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
-			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
-			OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
-			&ixgbe_ave_latency, 1, "Average Latency");
-
-        SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
-			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
-			OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
-			&ixgbe_bulk_latency, 1, "Bulk Latency");
-
 	/* Set up the timer callout */
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
@@ -592,22 +580,6 @@ ixgbe_attach(device_t dev)
 	/* Setup OS specific network interface */
 	ixgbe_setup_interface(dev, adapter);
 
-#ifdef IXGBE_IEEE1588
-	/*
-	** Setup the timer: IEEE 1588 support
-	*/
-	adapter->cycles.read = ixgbe_read_clock;
-	adapter->cycles.mask = (u64)-1;
-	adapter->cycles.mult = 1;
-	adapter->cycles.shift = IXGBE_TSYNC_SHIFT;
-	IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (1<<24) |
-	    IXGBE_TSYNC_CYCLE_TIME * IXGBE_TSYNC_SHIFT);
-	IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000);
-	IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0xFF800000);
-
-        // JFV - this is not complete yet
-#endif
-
 	/* Sysctl for limiting the amount of work done in the taskqueue */
 	ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
 	    "max number of rx packets to process", &adapter->rx_process_limit,
@@ -632,12 +604,13 @@ ixgbe_attach(device_t dev)
 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
 	    ("Unknown"));
 
-	if (hw->bus.width <= ixgbe_bus_width_pcie_x4) {
+	if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
+	    (hw->bus.speed == ixgbe_bus_speed_2500)) {
 		device_printf(dev, "PCI-Express bandwidth available"
 		    " for this card\n     is not sufficient for"
 		    " optimal performance.\n");
 		device_printf(dev, "For optimal performance a x8 "
-		    "PCI-Express slot is required.\n");
+		    "PCIE, or x4 PCIE 2 slot is required.\n");
         }
 
 	/* let hardware know driver is loaded */
@@ -670,8 +643,7 @@ static int
 ixgbe_detach(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
-	struct tx_ring *txr = adapter->tx_rings;
-	struct rx_ring *rxr = adapter->rx_rings;
+	struct ix_queue *que = adapter->queues;
 	u32	ctrl_ext;
 
 	INIT_DEBUGOUT("ixgbe_detach: begin");
@@ -686,17 +658,10 @@ ixgbe_detach(device_t dev)
 	ixgbe_stop(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 
-	for (int i = 0; i < adapter->num_queues; i++, txr++) {
-		if (txr->tq) {
-			taskqueue_drain(txr->tq, &txr->tx_task);
-			taskqueue_free(txr->tq);
-		}
-	}
-
-	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
-		if (rxr->tq) {
-			taskqueue_drain(rxr->tq, &rxr->rx_task);
-			taskqueue_free(rxr->tq);
+	for (int i = 0; i < adapter->num_queues; i++, que++) {
+		if (que->tq) {
+			taskqueue_drain(que->tq, &que->que_task);
+			taskqueue_free(que->tq);
 		}
 	}
 
@@ -833,6 +798,9 @@ ixgbe_mq_start(struct ifnet *ifp, struct
 	/* Which queue to use */
 	if ((m->m_flags & M_FLOWID) != 0)
 		i = m->m_pkthdr.flowid % adapter->num_queues;
+	else	/* use the cpu we're on */
+		i = curcpu % adapter->num_queues;
+
 	txr = &adapter->tx_rings[i];
 
 	if (IXGBE_TX_TRYLOCK(txr)) {
@@ -849,59 +817,43 @@ ixgbe_mq_start_locked(struct ifnet *ifp,
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
-        int             err = 0;
+        int             enqueued, err = 0;
 
-	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
-	    (!adapter->link_active)) {
-		err = drbr_enqueue(ifp, txr->br, m);
+	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+	    IFF_DRV_RUNNING || adapter->link_active == 0) {
+		if (m != NULL)
+			err = drbr_enqueue(ifp, txr->br, m);
 		return (err);
 	}
 
-	if (m == NULL) /* Called by tasklet */
-		goto process;
-
-	/* If nothing queued go right to xmit */
-	if (!drbr_needs_enqueue(ifp, txr->br)) {
-		if ((err = ixgbe_xmit(txr, &m)) != 0) {
-			if (m != NULL)
-				err = drbr_enqueue(ifp, txr->br, m);
-			return (err);
-		} else {
-			/* Success, update stats */
-			drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
-			/* Send a copy of the frame to the BPF listener */
-			ETHER_BPF_MTAP(ifp, m);
-			/* Set the watchdog */
-			txr->watchdog_check = TRUE;
-                }
-
-        } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
-		return (err);
-
-process:
-	if (drbr_empty(ifp, txr->br))
-		return (err);
+	enqueued = 0;
+	if (m == NULL)
+		next = drbr_dequeue(ifp, txr->br);
+	else
+		next = m;
 
 	/* Process the queue */
-	while (TRUE) {
-		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
-			break;
-		next = drbr_dequeue(ifp, txr->br);
-		if (next == NULL)
-			break;
+	while (next != NULL) {
 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
 			if (next != NULL)
 				err = drbr_enqueue(ifp, txr->br, next);
 			break;
 		}
+		enqueued++;
 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
+		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, next);
-		/* Set the watchdog */
-		txr->watchdog_check = TRUE;
+		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+			break;
+		if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) {
+			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+			break;
+		}
+		next = drbr_dequeue(ifp, txr->br);
 	}
-		
-	if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
-		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+
+	if (enqueued > 0) 
+		txr->watchdog_check = TRUE;
 
 	return (err);
 }
@@ -938,8 +890,8 @@ ixgbe_qflush(struct ifnet *ifp)
 static int
 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
 {
-	struct adapter *adapter = ifp->if_softc;
-	struct ifreq   *ifr = (struct ifreq *) data;
+	struct adapter	*adapter = ifp->if_softc;
+	struct ifreq	*ifr = (struct ifreq *) data;
 	int             error = 0;
 
 	switch (command) {
@@ -999,8 +951,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
-		/* Only allow changing when using header split */
-		if ((mask & IFCAP_LRO) && (ixgbe_header_split))
+		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
@@ -1010,15 +961,6 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c
 		break;
 	}
 
-#ifdef IXGBE_IEEE1588
-	/*
-	** IOCTL support for Precision Time (IEEE 1588) Support
-	*/
-	case SIOCSHWTSTAMP:
-		error = ixgbe_hwtstamp_ioctl(adapter, ifp);
-		break;
-#endif
-
 	default:
 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
 		error = ether_ioctl(ifp, command, data);
@@ -1045,15 +987,20 @@ ixgbe_init_locked(struct adapter *adapte
 {
 	struct ifnet   *ifp = adapter->ifp;
 	device_t 	dev = adapter->dev;
-	struct ixgbe_hw *hw;
+	struct ixgbe_hw *hw = &adapter->hw;
 	u32		k, txdctl, mhadd, gpie;
 	u32		rxdctl, rxctrl;
 	int		err;
 
+	mtx_assert(&adapter->core_mtx, MA_OWNED);
 	INIT_DEBUGOUT("ixgbe_init: begin");
+	ixgbe_reset_hw(hw);
+	hw->adapter_stopped = FALSE;
+	ixgbe_stop_adapter(hw);
+        callout_stop(&adapter->timer);
 
-	hw = &adapter->hw;
-	mtx_assert(&adapter->core_mtx, MA_OWNED);
+        /* reprogram the RAR[0] in case user changed it. */
+        ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
 
 	/* Get the latest mac address, User can use a LAA */
 	bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
@@ -1061,9 +1008,6 @@ ixgbe_init_locked(struct adapter *adapte
 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
 	hw->addr_ctrl.rar_used_count = 1;
 
-	/* Do a warm reset */
-	ixgbe_reset_hw(hw);
-
 	/* Prepare transmit descriptors and buffers */
 	if (ixgbe_setup_transmit_structures(adapter)) {
 		device_printf(dev,"Could not setup transmit structures\n");
@@ -1071,6 +1015,7 @@ ixgbe_init_locked(struct adapter *adapte
 		return;
 	}
 
+	ixgbe_init_hw(hw);
 	ixgbe_initialize_transmit_units(adapter);
 
 	/* Setup Multicast table */
@@ -1095,9 +1040,6 @@ ixgbe_init_locked(struct adapter *adapte
 	/* Configure RX settings */
 	ixgbe_initialize_receive_units(adapter);
 
-	/* Configure Interrupt Moderation */
-	ixgbe_init_moderation(adapter);
-
 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
 
 	if (hw->mac.type == ixgbe_mac_82599EB) {
@@ -1174,7 +1116,7 @@ ixgbe_init_locked(struct adapter *adapte
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
 	rxctrl |= IXGBE_RXCTRL_RXEN;
-	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl);
+	ixgbe_enable_rx_dma(hw, rxctrl);
 
 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
 
@@ -1291,34 +1233,22 @@ ixgbe_rearm_queues(struct adapter *adapt
 	}
 }
 
-static void
-ixgbe_handle_rx(void *context, int pending)
-{
-	struct rx_ring  *rxr = context;
-	struct adapter  *adapter = rxr->adapter;
-	u32		loop = MAX_LOOP;
-	bool		more;
-
-	do {
-		more = ixgbe_rxeof(rxr, -1);
-	} while (loop-- && more);
-        /* Reenable this interrupt */
-	ixgbe_enable_queue(adapter, rxr->msix);
-}
 
 static void
-ixgbe_handle_tx(void *context, int pending)
+ixgbe_handle_que(void *context, int pending)
 {
-	struct tx_ring  *txr = context;
-	struct adapter  *adapter = txr->adapter;
+	struct ix_queue *que = context;
+	struct adapter  *adapter = que->adapter;
+	struct tx_ring  *txr = que->txr;
 	struct ifnet    *ifp = adapter->ifp;
 	u32		loop = MAX_LOOP;
-	bool		more;
+	bool		more_rx, more_tx;
 
 	IXGBE_TX_LOCK(txr);
 	do {
-		more = ixgbe_txeof(txr);
-	} while (loop-- && more);
+		more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
+		more_tx = ixgbe_txeof(txr);
+	} while (loop-- && (more_rx || more_tx));
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 #if __FreeBSD_version >= 800000
@@ -1332,7 +1262,7 @@ ixgbe_handle_tx(void *context, int pendi
 
 	IXGBE_TX_UNLOCK(txr);
 	/* Reenable this interrupt */
-	ixgbe_enable_queue(adapter, txr->msix);
+	ixgbe_enable_queue(adapter, que->msix);
 }
 
 
@@ -1345,33 +1275,32 @@ ixgbe_handle_tx(void *context, int pendi
 static void
 ixgbe_legacy_irq(void *arg)
 {
-	struct adapter	*adapter = arg;
+	struct ix_queue *que = arg;
+	struct adapter	*adapter = que->adapter;
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct 		tx_ring *txr = adapter->tx_rings;
-	struct		rx_ring *rxr = adapter->rx_rings;
-	bool		more;
+	bool		more_tx, more_rx;
 	u32       	reg_eicr, loop = MAX_LOOP;
 
 
 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
 
+	++que->irqs;
 	if (reg_eicr == 0) {
 		ixgbe_enable_intr(adapter);
 		return;
 	}
 
-	if (ixgbe_rxeof(rxr, adapter->rx_process_limit))
-		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
+	more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
 	IXGBE_TX_LOCK(txr);
-	++txr->tx_irq;
 	do {
-		more = ixgbe_txeof(txr);
-	} while (loop-- && more);
+		more_tx = ixgbe_txeof(txr);
+	} while (loop-- && more_tx);
 	IXGBE_TX_UNLOCK(txr);
 
-	if (more)
-		taskqueue_enqueue(txr->tq, &txr->tx_task);
+	if (more_rx || more_tx)
+		taskqueue_enqueue(que->tq, &que->que_task);
 
 	/* Check for fan failure */
 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
@@ -1382,15 +1311,8 @@ ixgbe_legacy_irq(void *arg)
 	}
 
 	/* Link status change */
-	if (reg_eicr & IXGBE_EICR_LSC) {
-		ixgbe_check_link(&adapter->hw,
-		    &adapter->link_speed, &adapter->link_up, 0);
-        	ixgbe_update_link_status(adapter);
-	}
-
-	/* Update interrupt rate */
-	if (ixgbe_enable_aim == TRUE)
-		ixgbe_update_aim(rxr);
+	if (reg_eicr & IXGBE_EICR_LSC)
+		taskqueue_enqueue(adapter->tq, &adapter->link_task);
 
 	ixgbe_enable_intr(adapter);
 	return;
@@ -1399,55 +1321,85 @@ ixgbe_legacy_irq(void *arg)
 
 /*********************************************************************
  *
- *  MSI TX Interrupt Service routine
+ *  MSI Queue Interrupt Service routine
  *
  **********************************************************************/
 void
-ixgbe_msix_tx(void *arg)
+ixgbe_msix_que(void *arg)
 {
-	struct tx_ring	*txr = arg;
-	struct adapter  *adapter = txr->adapter;
-	bool		more;
+	struct ix_queue	*que = arg;
+	struct adapter  *adapter = que->adapter;
+	struct tx_ring	*txr = que->txr;
+	struct rx_ring	*rxr = que->rxr;
+	bool		more_tx, more_rx;
+	u32		newitr = 0;
 
-	ixgbe_disable_queue(adapter, txr->msix);
+	ixgbe_disable_queue(adapter, que->msix);
+	++que->irqs;
+
+	more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
 	IXGBE_TX_LOCK(txr);
-	++txr->tx_irq;
-	more = ixgbe_txeof(txr);
+	more_tx = ixgbe_txeof(txr);
 	IXGBE_TX_UNLOCK(txr);
-	if (more)
-		taskqueue_enqueue(txr->tq, &txr->tx_task);
-	else /* Reenable this interrupt */
-		ixgbe_enable_queue(adapter, txr->msix);
-	return;
-}
 
+	more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
-/*********************************************************************
- *
- *  MSIX RX Interrupt Service routine
- *
- **********************************************************************/
-static void
-ixgbe_msix_rx(void *arg)
-{
-	struct rx_ring	*rxr = arg;
-	struct adapter  *adapter = rxr->adapter;
-	bool		more;
-
-	ixgbe_disable_queue(adapter, rxr->msix);
+	/* Do AIM now? */
 
-	++rxr->rx_irq;
-	more = ixgbe_rxeof(rxr, adapter->rx_process_limit);
-
-	/* Update interrupt rate */
-	if (ixgbe_enable_aim == TRUE)
-		ixgbe_update_aim(rxr);
+	if (ixgbe_enable_aim == FALSE)
+		goto no_calc;
+	/*
+	** Do Adaptive Interrupt Moderation:
+        **  - Write out last calculated setting
+	**  - Calculate based on average size over
+	**    the last interval.
+	*/
+        if (que->eitr_setting)
+                IXGBE_WRITE_REG(&adapter->hw,
+                    IXGBE_EITR(que->msix), que->eitr_setting);
+ 
+        que->eitr_setting = 0;
 
-	if (more)
-		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
+        /* Idle, do nothing */
+        if ((txr->bytes == 0) && (rxr->bytes == 0))
+                goto no_calc;
+                                
+	if ((txr->bytes) && (txr->packets))
+               	newitr = txr->bytes/txr->packets;
+	if ((rxr->bytes) && (rxr->packets))
+		newitr = max(newitr,
+		    (rxr->bytes / rxr->packets));
+	newitr += 24; /* account for hardware frame, crc */
+
+	/* set an upper boundary */
+	newitr = min(newitr, 3000);
+
+	/* Be nice to the mid range */
+	if ((newitr > 300) && (newitr < 1200))
+		newitr = (newitr / 3);
 	else
-		ixgbe_enable_queue(adapter, rxr->msix);
+		newitr = (newitr / 2);
+
+        if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+                newitr |= newitr << 16;
+        else
+                newitr |= IXGBE_EITR_CNT_WDIS;
+                 
+        /* save for next interrupt */
+        que->eitr_setting = newitr;
+
+        /* Reset state */
+        txr->bytes = 0;
+        txr->packets = 0;
+        rxr->bytes = 0;
+        rxr->packets = 0;
+
+no_calc:
+	if (more_tx || more_rx)
+		taskqueue_enqueue(que->tq, &que->que_task);
+	else /* Reenable this interrupt */
+		ixgbe_enable_queue(adapter, que->msix);
 	return;
 }
 
@@ -1512,84 +1464,6 @@ ixgbe_msix_link(void *arg)
 	return;
 }
 
-/*
-** Routine to do adjust the RX EITR value based on traffic,
-** its a simple three state model, but seems to help.
-**
-** Note that the three EITR values are tuneable using
-** sysctl in real time. The feature can be effectively
-** nullified by setting them equal.
-*/
-#define BULK_THRESHOLD	10000
-#define AVE_THRESHOLD	1600
-
-static void
-ixgbe_update_aim(struct rx_ring *rxr)
-{
-	struct adapter  *adapter = rxr->adapter;
-	u32             olditr, newitr;
-
-	/* Update interrupt moderation based on traffic */
-	olditr = rxr->eitr_setting;
-	newitr = olditr;
-
-	/* Idle, don't change setting */
-	if (rxr->bytes == 0)   
-		return;
-                
-	if (olditr == ixgbe_low_latency) {
-		if (rxr->bytes > AVE_THRESHOLD)
-			newitr = ixgbe_ave_latency;
-	} else if (olditr == ixgbe_ave_latency) {
-		if (rxr->bytes < AVE_THRESHOLD)
-			newitr = ixgbe_low_latency;
-		else if (rxr->bytes > BULK_THRESHOLD)
-			newitr = ixgbe_bulk_latency;
-	} else if (olditr == ixgbe_bulk_latency) {
-		if (rxr->bytes < BULK_THRESHOLD)
-			newitr = ixgbe_ave_latency;
-	}
-
-	if (olditr != newitr) {
-		/* Change interrupt rate */
-		rxr->eitr_setting = newitr;
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rxr->me),
-		    newitr | (newitr << 16));
-	}
-
-	rxr->bytes = 0;
-	return;
-}
-
-static void
-ixgbe_init_moderation(struct adapter *adapter)
-{
-	struct rx_ring *rxr = adapter->rx_rings;
-	struct tx_ring *txr = adapter->tx_rings;
-
-	/* Single interrupt - MSI or Legacy? */
-	if (adapter->msix < 2) {
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(0), 100);
-		return;
-	}
-
-	/* TX irq moderation rate is fixed */
-	for (int i = 0; i < adapter->num_queues; i++, txr++)
-		IXGBE_WRITE_REG(&adapter->hw,
-		    IXGBE_EITR(txr->msix), ixgbe_ave_latency);
-
-	/* RX moderation will be adapted over time, set default */
-	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
-		IXGBE_WRITE_REG(&adapter->hw,
-		    IXGBE_EITR(rxr->msix), ixgbe_low_latency);
-	}
-
-	/* Set Link moderation */
-	IXGBE_WRITE_REG(&adapter->hw,
-	    IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
-
-}
-
 /*********************************************************************
  *
  *  Media Ioctl callback
@@ -1665,11 +1539,10 @@ ixgbe_media_change(struct ifnet * ifp)
 
 /*********************************************************************
  *
- *  This routine maps the mbufs to tx descriptors.
- *    WARNING: while this code is using an MQ style infrastructure,
- *    it would NOT work as is with more than 1 queue.
+ *  This routine maps the mbufs to tx descriptors, allowing the
+ *  TX engine to transmit the packets. 
+ *  	- return 0 on success, positive on failure
  *
- *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
@@ -1695,14 +1568,6 @@ ixgbe_xmit(struct tx_ring *txr, struct m
 	if (m_head->m_flags & M_VLANTAG)
         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
 
-	/* Do a clean if descriptors are low */
-	if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
-		ixgbe_txeof(txr);
-		/* Now do we at least have a minimal? */
-		if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
-			return (ENOBUFS);
-        }
-
         /*
          * Important to capture the first descriptor
          * used because it will contain the index of
@@ -1756,7 +1621,7 @@ ixgbe_xmit(struct tx_ring *txr, struct m
 
 	/* Make certain there are enough descriptors */
 	if (nsegs > txr->tx_avail - 2) {
-		txr->no_tx_desc_avail++;
+		txr->no_desc_avail++;
 		error = ENOBUFS;
 		goto xmit_fail;
 	}
@@ -1814,7 +1679,7 @@ ixgbe_xmit(struct tx_ring *txr, struct m
 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
 		    cmd_type_len |seglen);
 		txd->read.olinfo_status = htole32(olinfo_status);
-		last = i; /* Next descriptor that will get completed */
+		last = i; /* descriptor that will get completion IRQ */
 
 		if (++i == adapter->num_tx_desc)
 			i = 0;
@@ -1843,7 +1708,13 @@ ixgbe_xmit(struct tx_ring *txr, struct m
 	 * hardware that this frame is available to transmit.
 	 */
 	++txr->total_packets;
+	txr->watchdog_time = ticks;
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
+
+	/* Do a clean if descriptors are low */
+	if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD)
+		ixgbe_txeof(txr);
+
 	return (0);
 
 xmit_fail:
@@ -1978,7 +1849,6 @@ ixgbe_local_timer(void *arg)
 	struct ifnet   *ifp = adapter->ifp;
 	device_t	dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
-	bool   tx_hung = FALSE;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 
@@ -1989,21 +1859,32 @@ ixgbe_local_timer(void *arg)
 
 	ixgbe_update_link_status(adapter);
 	ixgbe_update_stats_counters(adapter);
-	if (ixgbe_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) {
+
+	/* Debug display */
+	if (ixgbe_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
 		ixgbe_print_hw_stats(adapter);
-	}
+
+	/*
+	 * If the interface has been paused
+	 * then don't do the watchdog check
+	 */
+	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
+		goto out;
 	/*
 	** Check for time since any descriptor was cleaned
 	*/
         for (int i = 0; i < adapter->num_queues; i++, txr++) {
-		if (txr->watchdog_check == FALSE)
+		IXGBE_TX_LOCK(txr);
+		if (txr->watchdog_check == FALSE) {
+			IXGBE_TX_UNLOCK(txr);
 			continue;
-		if ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG) {
-			tx_hung = TRUE;
-			goto hung;
 		}
+		if ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG)
+			goto hung;
+		IXGBE_TX_UNLOCK(txr);
 	}
 out:
+       	ixgbe_rearm_queues(adapter, adapter->que_mask);
 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
 	return;
 
@@ -2017,6 +1898,7 @@ hung:
 	    txr->me, txr->tx_avail, txr->next_to_clean);
 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->watchdog_events++;
+	IXGBE_TX_UNLOCK(txr);
 	ixgbe_init_locked(adapter);
 }
 
@@ -2123,8 +2005,7 @@ static int
 ixgbe_allocate_legacy(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
-	struct 		tx_ring *txr = adapter->tx_rings;
-	struct		rx_ring *rxr = adapter->rx_rings;
+	struct		ix_queue *que = adapter->queues;
 	int error, rid = 0;
 
 	/* MSI RID at 1 */
@@ -2144,15 +2025,10 @@ ixgbe_allocate_legacy(struct adapter *ad
 	 * Try allocating a fast interrupt and the associated deferred
 	 * processing contexts.
 	 */
-	TASK_INIT(&txr->tx_task, 0, ixgbe_handle_tx, txr);
-	TASK_INIT(&rxr->rx_task, 0, ixgbe_handle_rx, rxr);
-	txr->tq = taskqueue_create_fast("ixgbe_txq", M_NOWAIT,
-            taskqueue_thread_enqueue, &txr->tq);
-	rxr->tq = taskqueue_create_fast("ixgbe_rxq", M_NOWAIT,
-            taskqueue_thread_enqueue, &rxr->tq);
-	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
-            device_get_nameunit(adapter->dev));
-	taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
+	TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
+	que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
+            taskqueue_thread_enqueue, &que->tq);
+	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
             device_get_nameunit(adapter->dev));
 
 	/* Tasklets for Link, SFP and Multispeed Fiber */
@@ -2169,15 +2045,17 @@ ixgbe_allocate_legacy(struct adapter *ad
 
 	if ((error = bus_setup_intr(dev, adapter->res,
             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
-            adapter, &adapter->tag)) != 0) {
+            que, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 		    "handler: %d\n", error);
-		taskqueue_free(txr->tq);
-		taskqueue_free(rxr->tq);
-		txr->tq = NULL;
-		rxr->tq = NULL;
+		taskqueue_free(que->tq);
+		taskqueue_free(adapter->tq);
+		que->tq = NULL;
+		adapter->tq = NULL;
 		return (error);
 	}
+	/* For simplicity in the handlers */
+	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
 
 	return (0);
 }
@@ -2192,83 +2070,44 @@ static int
 ixgbe_allocate_msix(struct adapter *adapter)
 {
 	device_t        dev = adapter->dev;
-	struct 		tx_ring *txr = adapter->tx_rings;
-	struct		rx_ring *rxr = adapter->rx_rings;
+	struct 		ix_queue *que = adapter->queues;
 	int 		error, rid, vector = 0;
 
-	/* TX setup: the code is here for multi tx,
-	   there are other parts of the driver not ready for it */
-	for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
+	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
 		rid = vector + 1;
-		txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
+		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 		    RF_SHAREABLE | RF_ACTIVE);
-		if (!txr->res) {
+		if (que->res == NULL) {
 			device_printf(dev,"Unable to allocate"
-		    	    " bus resource: tx interrupt [%d]\n", vector);
+		    	    " bus resource: que interrupt [%d]\n", vector);
 			return (ENXIO);
 		}
 		/* Set the handler function */
-		error = bus_setup_intr(dev, txr->res,
+		error = bus_setup_intr(dev, que->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
-		    ixgbe_msix_tx, txr, &txr->tag);
+		    ixgbe_msix_que, que, &que->tag);
 		if (error) {
-			txr->res = NULL;
-			device_printf(dev, "Failed to register TX handler");
+			que->res = NULL;
+			device_printf(dev, "Failed to register QUE handler");
 			return (error);
 		}
-		txr->msix = vector;
+		que->msix = vector;
+        	adapter->que_mask |= (u64)(1 << que->msix);
 		/*
 		** Bind the msix vector, and thus the
 		** ring to the corresponding cpu.
 		*/
 		if (adapter->num_queues > 1)
-			bus_bind_intr(dev, txr->res, i);
+			bus_bind_intr(dev, que->res, i);
 
-		TASK_INIT(&txr->tx_task, 0, ixgbe_handle_tx, txr);
-		txr->tq = taskqueue_create_fast("ixgbe_txq", M_NOWAIT,
-		    taskqueue_thread_enqueue, &txr->tq);
-		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
+		TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
+		que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list