svn commit: r253691 - in head: sys/conf sys/dev/cxgbe sys/dev/cxgbe/common sys/modules/cxgbe/if_cxgbe tools/tools/cxgbetool

Navdeep Parhar np at FreeBSD.org
Fri Jul 26 22:04:13 UTC 2013


Author: np
Date: Fri Jul 26 22:04:11 2013
New Revision: 253691
URL: http://svnweb.freebsd.org/changeset/base/253691

Log:
  Add support for packet-sniffing tracers to cxgbe(4).  This works with
  all T4 and T5 based cards and is useful for analyzing TSO, LRO, TOE, and
  for general purpose monitoring without tapping any cxgbe or cxl ifnet
  directly.
  
  Tracers on the T4/T5 chips provide access to Ethernet frames exactly as
  they were received from or transmitted on the wire.  On transmit, a
  tracer will capture a frame after TSO segmentation, hw VLAN tag
  insertion, hw L3 & L4 checksum insertion, etc.  It will also capture
  frames generated by the TCP offload engine (TOE traffic is normally
  invisible to the kernel).  On receive, a tracer will capture a frame
  before hw VLAN extraction, runt filtering, other badness filtering,
  before the steering/drop/L2-rewrite filters or the TOE have had a go at
  it, and of course before sw LRO in the driver.
  
  There are 4 tracers on a chip.  A tracer can trace only in one direction
  (tx or rx).  For now cxgbetool will set up tracers to capture the first
  128B of every transmitted or received frame on a given port.  This is a
  small subset of what the hardware can do.  A pseudo ifnet with the same
  name as the nexus driver (t4nex0 or t5nex0) will be created for tracing.
  The data delivered to this ifnet is an additional copy made inside the
  chip.  Normal delivery to cxgbe<n> or cxl<n> will be made as usual.
  
  /* watch cxl0, which is the first port hanging off t5nex0. */
  # cxgbetool t5nex0 tracer 0 tx0  (watch what cxl0 is transmitting)
  # cxgbetool t5nex0 tracer 1 rx0  (watch what cxl0 is receiving)
  # cxgbetool t5nex0 tracer list
  # tcpdump -i t5nex0   <== all that cxl0 sees and puts on the wire
  
  If you were doing TSO, a tcpdump on cxl0 may have shown you ~64K
  "frames" with no L3/L4 checksum but this will show you the frames that
  were actually transmitted.
  
  /* all done */
  # cxgbetool t5nex0 tracer 0 disable
  # cxgbetool t5nex0 tracer 1 disable
  # cxgbetool t5nex0 tracer list
  # ifconfig t5nex0 destroy

Added:
  head/sys/dev/cxgbe/t4_tracer.c   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/dev/cxgbe/adapter.h
  head/sys/dev/cxgbe/common/t4_hw.c
  head/sys/dev/cxgbe/common/t4_hw.h
  head/sys/dev/cxgbe/t4_ioctl.h
  head/sys/dev/cxgbe/t4_main.c
  head/sys/dev/cxgbe/t4_sge.c
  head/sys/modules/cxgbe/if_cxgbe/Makefile
  head/tools/tools/cxgbetool/cxgbetool.c

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/conf/files	Fri Jul 26 22:04:11 2013	(r253691)
@@ -1155,6 +1155,8 @@ dev/cxgbe/t4_sge.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_l2t.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
+dev/cxgbe/t4_tracer.c		optional cxgbe pci \
+	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/common/t4_hw.c	optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 t4fw_cfg.c		optional cxgbe					\

Modified: head/sys/dev/cxgbe/adapter.h
==============================================================================
--- head/sys/dev/cxgbe/adapter.h	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/dev/cxgbe/adapter.h	Fri Jul 26 22:04:11 2013	(r253691)
@@ -172,6 +172,7 @@ enum {
 	DOOMED		= (1 << 0),
 	PORT_INIT_DONE	= (1 << 1),
 	PORT_SYSCTL_CTX	= (1 << 2),
+	HAS_TRACEQ	= (1 << 3),
 };
 
 #define IS_DOOMED(pi)	((pi)->flags & DOOMED)
@@ -577,6 +578,14 @@ struct adapter {
 #endif
 	int flags;
 
+	char ifp_lockname[16];
+	struct mtx ifp_lock;
+	struct ifnet *ifp;	/* tracer ifp */
+	struct ifmedia media;
+	int traceq;		/* iq used by all tracers, -1 if none */
+	int tracer_valid;	/* bitmap of valid tracers */
+	int tracer_enabled;	/* bitmap of enabled tracers */
+
 	char fw_version[32];
 	char cfg_file[32];
 	u_int cfcsum;
@@ -808,6 +817,16 @@ int t4_eth_tx(struct ifnet *, struct sge
 void t4_update_fl_bufsize(struct ifnet *);
 int can_resume_tx(struct sge_eq *);
 
+/* t4_tracer.c */
+struct t4_tracer;
+void t4_tracer_modload(void);
+void t4_tracer_modunload(void);
+void t4_tracer_port_detach(struct adapter *);
+int t4_get_tracer(struct adapter *, struct t4_tracer *);
+int t4_set_tracer(struct adapter *, struct t4_tracer *);
+int t4_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *);
+int t5_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *);
+
 static inline struct wrqe *
 alloc_wrqe(int wr_len, struct sge_wrq *wrq)
 {

Modified: head/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- head/sys/dev/cxgbe/common/t4_hw.c	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/dev/cxgbe/common/t4_hw.c	Fri Jul 26 22:04:11 2013	(r253691)
@@ -3624,22 +3624,24 @@ void t4_get_chan_txrate(struct adapter *
  *	@idx: which filter to configure
  *	@enable: whether to enable or disable the filter
  *
- *	Configures one of the tracing filters available in HW.  If @enable is
- *	%0 @tp is not examined and may be %NULL. The user is responsible to
- *	set the single/multiple trace mode by writing to A_MPS_TRC_CFG register
- *	by using "cxgbtool iface reg reg_addr=val" command. See t4_sniffer/
- *	docs/readme.txt for a complete description of how to setup traceing on
- *	T4.
+ *	Configures one of the tracing filters available in HW.  If @tp is %NULL
+ *	it indicates that the filter is already written in the register and it
+ *	just needs to be enabled or disabled.
  */
-int t4_set_trace_filter(struct adapter *adap, const struct trace_params *tp, int idx,
-			int enable)
+int t4_set_trace_filter(struct adapter *adap, const struct trace_params *tp,
+    int idx, int enable)
 {
 	int i, ofst = idx * 4;
 	u32 data_reg, mask_reg, cfg;
 	u32 multitrc = F_TRCMULTIFILTER;
+	u32 en = is_t4(adap) ? F_TFEN : F_T5_TFEN;
 
-	if (!enable) {
-		t4_write_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst, 0);
+	if (idx < 0 || idx >= NTRACE)
+		return -EINVAL;
+
+	if (tp == NULL || !enable) {
+		t4_set_reg_field(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst, en,
+		    enable ? en : 0);
 		return 0;
 	}
 
@@ -3660,8 +3662,7 @@ int t4_set_trace_filter(struct adapter *
 		 */
 		if (tp->snap_len > ((10 * 1024 / 4) - (2 * 8)))
 			return -EINVAL;		
-	}
-	else {
+	} else {
 		/*
 		 * If multiple tracers are disabled, to avoid deadlocks 
 		 * maximum packet capture size of 9600 bytes is recommended.
@@ -3672,12 +3673,13 @@ int t4_set_trace_filter(struct adapter *
 			return -EINVAL;
 	}
 
-	if (tp->port > 11 || tp->invert > 1 || tp->skip_len > M_TFLENGTH ||
-	    tp->skip_ofst > M_TFOFFSET || tp->min_len > M_TFMINPKTSIZE)
+	if (tp->port > (is_t4(adap) ? 11 : 19) || tp->invert > 1 ||
+	    tp->skip_len > M_TFLENGTH || tp->skip_ofst > M_TFOFFSET ||
+	    tp->min_len > M_TFMINPKTSIZE)
 		return -EINVAL;
 
 	/* stop the tracer we'll be changing */
-	t4_write_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst, 0);
+	t4_set_reg_field(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst, en, 0);
 
 	idx *= (A_MPS_TRC_FILTER1_MATCH - A_MPS_TRC_FILTER0_MATCH);
 	data_reg = A_MPS_TRC_FILTER0_MATCH + idx;
@@ -3691,11 +3693,10 @@ int t4_set_trace_filter(struct adapter *
 		     V_TFCAPTUREMAX(tp->snap_len) |
 		     V_TFMINPKTSIZE(tp->min_len));
 	t4_write_reg(adap, A_MPS_TRC_FILTER_MATCH_CTL_A + ofst,
-		     V_TFOFFSET(tp->skip_ofst) | V_TFLENGTH(tp->skip_len) |
-		     is_t4(adap) ?
-		     V_TFPORT(tp->port) | F_TFEN | V_TFINVERTMATCH(tp->invert) :
-		     V_T5_TFPORT(tp->port) | F_T5_TFEN |
-		     V_T5_TFINVERTMATCH(tp->invert));
+		     V_TFOFFSET(tp->skip_ofst) | V_TFLENGTH(tp->skip_len) | en |
+		     (is_t4(adap) ?
+		     V_TFPORT(tp->port) | V_TFINVERTMATCH(tp->invert) :
+		     V_T5_TFPORT(tp->port) | V_T5_TFINVERTMATCH(tp->invert)));
 
 	return 0;
 }
@@ -3722,15 +3723,16 @@ void t4_get_trace_filter(struct adapter 
 	if (is_t4(adap)) {
 		*enabled = !!(ctla & F_TFEN);
 		tp->port =  G_TFPORT(ctla);
+		tp->invert = !!(ctla & F_TFINVERTMATCH);
 	} else {
 		*enabled = !!(ctla & F_T5_TFEN);
 		tp->port = G_T5_TFPORT(ctla);
+		tp->invert = !!(ctla & F_T5_TFINVERTMATCH);
 	}
 	tp->snap_len = G_TFCAPTUREMAX(ctlb);
 	tp->min_len = G_TFMINPKTSIZE(ctlb);
 	tp->skip_ofst = G_TFOFFSET(ctla);
 	tp->skip_len = G_TFLENGTH(ctla);
-	tp->invert = !!(ctla & F_TFINVERTMATCH);
 
 	ofst = (A_MPS_TRC_FILTER1_MATCH - A_MPS_TRC_FILTER0_MATCH) * idx;
 	data_reg = A_MPS_TRC_FILTER0_MATCH + ofst;

Modified: head/sys/dev/cxgbe/common/t4_hw.h
==============================================================================
--- head/sys/dev/cxgbe/common/t4_hw.h	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/dev/cxgbe/common/t4_hw.h	Fri Jul 26 22:04:11 2013	(r253691)
@@ -45,6 +45,7 @@ enum {
 	NTX_SCHED       = 8,     /* # of HW Tx scheduling queues */
 	PM_NSTATS       = 5,     /* # of PM stats */
 	MBOX_LEN        = 64,    /* mailbox size in bytes */
+	NTRACE          = 4,     /* # of tracing filters */
 	TRACE_LEN       = 112,   /* length of trace data and mask */
 	FILTER_OPT_LEN  = 36,    /* filter tuple width of optional components */
 	NWOL_PAT        = 8,     /* # of WoL patterns */

Modified: head/sys/dev/cxgbe/t4_ioctl.h
==============================================================================
--- head/sys/dev/cxgbe/t4_ioctl.h	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/dev/cxgbe/t4_ioctl.h	Fri Jul 26 22:04:11 2013	(r253691)
@@ -54,6 +54,8 @@ enum {
 	T4_SET_OFLD_POLICY,		/* Set offload policy */
 	T4_SET_SCHED_CLASS,             /* set sched class */
 	T4_SET_SCHED_QUEUE,             /* set queue class */
+	T4_GET_TRACER,			/* get information about a tracer */
+	T4_SET_TRACER,			/* program a tracer */
 };
 
 struct t4_reg {
@@ -226,6 +228,25 @@ struct t4_mem_range {
 	uint32_t *data;
 };
 
+#define T4_TRACE_LEN 112
+struct t4_trace_params {
+	uint32_t data[T4_TRACE_LEN / 4];
+	uint32_t mask[T4_TRACE_LEN / 4];
+	uint16_t snap_len;
+	uint16_t min_len;
+	uint8_t skip_ofst;
+	uint8_t skip_len;
+	uint8_t invert;
+	uint8_t port;
+};
+
+struct t4_tracer {
+	uint8_t idx;
+	uint8_t enabled;
+	uint8_t valid;
+	struct t4_trace_params tp;
+};
+
 #define CHELSIO_T4_GETREG	_IOWR('f', T4_GETREG, struct t4_reg)
 #define CHELSIO_T4_SETREG	_IOW('f', T4_SETREG, struct t4_reg)
 #define CHELSIO_T4_REGDUMP	_IOWR('f', T4_REGDUMP, struct t4_regdump)
@@ -240,4 +261,6 @@ struct t4_mem_range {
 #define CHELSIO_T4_GET_MEM	_IOW('f', T4_GET_MEM, struct t4_mem_range)
 #define CHELSIO_T4_GET_I2C	_IOWR('f', T4_GET_I2C, struct t4_i2c_data)
 #define CHELSIO_T4_CLEAR_STATS	_IOW('f', T4_CLEAR_STATS, uint32_t)
+#define CHELSIO_T4_GET_TRACER	_IOWR('f', T4_GET_TRACER, struct t4_tracer)
+#define CHELSIO_T4_SET_TRACER	_IOW('f', T4_SET_TRACER, struct t4_tracer)
 #endif

Modified: head/sys/dev/cxgbe/t4_main.c
==============================================================================
--- head/sys/dev/cxgbe/t4_main.c	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/dev/cxgbe/t4_main.c	Fri Jul 26 22:04:11 2013	(r253691)
@@ -557,6 +557,11 @@ t4_attach(device_t dev)
 		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
 	}
 
+	sc->traceq = -1;
+	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
+	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
+	    device_get_nameunit(dev));
+
 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
 	    device_get_nameunit(dev));
 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
@@ -588,8 +593,11 @@ t4_attach(device_t dev)
 	for (i = 0; i < nitems(sc->fw_msg_handler); i++)
 		sc->fw_msg_handler[i] = fw_msg_not_handled;
 	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
+	t4_register_cpl_handler(sc, CPL_TRACE_PKT, t4_trace_pkt);
+	t4_register_cpl_handler(sc, CPL_TRACE_PKT_T5, t5_trace_pkt);
 	t4_init_sge_cpl_handlers(sc);
 
+
 	/* Prepare the adapter for operation */
 	rc = -t4_prep_adapter(sc);
 	if (rc != 0) {
@@ -668,6 +676,7 @@ t4_attach(device_t dev)
 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
 		    device_get_nameunit(dev), i);
 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
+		sc->chan_map[pi->tx_chan] = i;
 
 		if (is_10G_port(pi) || is_40G_port(pi)) {
 			n10g++;
@@ -916,6 +925,8 @@ t4_detach(device_t dev)
 		mtx_destroy(&sc->tids.ftid_lock);
 	if (mtx_initialized(&sc->sfl_lock))
 		mtx_destroy(&sc->sfl_lock);
+	if (mtx_initialized(&sc->ifp_lock))
+		mtx_destroy(&sc->ifp_lock);
 
 	bzero(sc, sizeof(*sc));
 
@@ -1018,6 +1029,11 @@ cxgbe_detach(device_t dev)
 #endif
 	ADAPTER_UNLOCK(sc);
 
+	if (pi->flags & HAS_TRACEQ) {
+		sc->traceq = -1;	/* cloner should not create ifnet */
+		t4_tracer_port_detach(sc);
+	}
+
 	if (pi->vlan_c)
 		EVENTHANDLER_DEREGISTER(vlan_config, pi->vlan_c);
 
@@ -2887,6 +2903,17 @@ cxgbe_init_synchronized(struct port_info
 		goto done;
 	}
 
+	/*
+	 * The first iq of the first port to come up is used for tracing.
+	 */
+	if (sc->traceq < 0) {
+		sc->traceq = sc->sge.rxq[pi->first_rxq].iq.abs_id;
+		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
+		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
+		    V_QUEUENUMBER(sc->traceq));
+		pi->flags |= HAS_TRACEQ;
+	}
+
 	/* all ok */
 	setbit(&sc->open_device_map, pi->port_id);
 	PORT_LOCK(pi);
@@ -7414,6 +7441,12 @@ t4_ioctl(struct cdev *dev, unsigned long
 		}
 		break;
 	}
+	case CHELSIO_T4_GET_TRACER:
+		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
+		break;
+	case CHELSIO_T4_SET_TRACER:
+		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
+		break;
 	default:
 		rc = EINVAL;
 	}
@@ -7650,12 +7683,14 @@ mod_event(module_t mod, int cmd, void *a
 		mtx_init(&t4_uld_list_lock, "T4 ULDs", 0, MTX_DEF);
 		SLIST_INIT(&t4_uld_list);
 #endif
+		t4_tracer_modload();
 		tweak_tunables();
 		break;
 
 	case MOD_UNLOAD:
 		if (atomic_fetchadd_int(&loaded, -1) > 1)
 			break;
+		t4_tracer_modunload();
 #ifdef TCP_OFFLOAD
 		mtx_lock(&t4_uld_list_lock);
 		if (!SLIST_EMPTY(&t4_uld_list)) {

Modified: head/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- head/sys/dev/cxgbe/t4_sge.c	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/dev/cxgbe/t4_sge.c	Fri Jul 26 22:04:11 2013	(r253691)
@@ -276,7 +276,6 @@ t4_init_sge_cpl_handlers(struct adapter 
 	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
 	t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
 	t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
-
 	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
 }
 

Added: head/sys/dev/cxgbe/t4_tracer.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/cxgbe/t4_tracer.c	Fri Jul 26 22:04:11 2013	(r253691)
@@ -0,0 +1,519 @@
+/*-
+ * Copyright (c) 2013 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/types.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+
+#include "common/common.h"
+#include "common/t4_msg.h"
+#include "common/t4_regs.h"
+#include "t4_ioctl.h"
+
+/*
+ * Locking notes
+ * =============
+ *
+ * An interface cloner is registered during mod_load and it can be used to
+ * create or destroy the tracing ifnet for an adapter at any time.  It is
+ * possible for the cloned interface to outlive the adapter (adapter disappears
+ * in t4_detach but the tracing ifnet may live till mod_unload when removal of
+ * the cloner finally destroys any remaining cloned interfaces).  When tracing
+ * filters are active, this ifnet is also receiving data.  There are potential
+ * bad races between ifnet create, ifnet destroy, ifnet rx, ifnet ioctl,
+ * cxgbe_detach/t4_detach, mod_unload.
+ *
+ * a) The driver selects an iq for tracing (sc->traceq) inside a synch op.  The
+ *    iq is destroyed inside a synch op too (and sc->traceq updated).
+ * b) The cloner looks for an adapter that matches the name of the ifnet it's
+ *    been asked to create, starts a synch op on that adapter, and proceeds only
+ *    if the adapter has a tracing iq.
+ * c) The cloned ifnet and the adapter are coupled to each other via
+ *    ifp->if_softc and sc->ifp.  These can be modified only with the global
+ *    t4_trace_lock sx as well as the sc->ifp_lock mutex held.  Holding either
+ *    of these will prevent any change.
+ *
+ * The order in which all the locks involved should be acquired are:
+ * t4_list_lock
+ * adapter lock
+ * (begin synch op and let go of the above two)
+ * t4_trace_lock
+ * sc->ifp_lock
+ */
+
+static struct sx t4_trace_lock;
+static const char *t4_cloner_name = "tXnex";
+static struct if_clone *t4_cloner;
+
+/* tracer ifnet routines.  mostly no-ops. */
+static void tracer_init(void *);
+static int tracer_ioctl(struct ifnet *, unsigned long, caddr_t);
+static int tracer_transmit(struct ifnet *, struct mbuf *);
+static void tracer_qflush(struct ifnet *);
+static int tracer_media_change(struct ifnet *);
+static void tracer_media_status(struct ifnet *, struct ifmediareq *);
+
+/* match name (request/response) */
+struct match_rr {
+	const char *name;
+	int lock;	/* set to 1 to returned sc locked. */
+	struct adapter *sc;
+	int rc;
+};
+
+static void
+match_name(struct adapter *sc, void *arg)
+{
+	struct match_rr *mrr = arg;
+
+	if (strcmp(device_get_nameunit(sc->dev), mrr->name) != 0)
+		return;
+
+	KASSERT(mrr->sc == NULL, ("%s: multiple matches (%p, %p) for %s",
+	    __func__, mrr->sc, sc, mrr->name));
+
+	mrr->sc = sc;
+	if (mrr->lock)
+		mrr->rc = begin_synchronized_op(mrr->sc, NULL, 0, "t4clon");
+	else
+		mrr->rc = 0;
+}
+
+static int
+t4_cloner_match(struct if_clone *ifc, const char *name)
+{
+	struct match_rr mrr;
+
+	mrr.name = name;
+	mrr.lock = 0;
+	mrr.sc = NULL;
+	t4_iterate(match_name, &mrr);
+
+	return (mrr.sc != NULL);
+}
+
+static int
+t4_cloner_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+	struct match_rr mrr;
+	struct adapter *sc;
+	struct ifnet *ifp;
+	int rc, unit;
+	const uint8_t lla[ETHER_ADDR_LEN] = {0, 0, 0, 0, 0, 0};
+
+	mrr.name = name;
+	mrr.lock = 1;
+	mrr.sc = NULL;
+	mrr.rc = ENOENT;
+	t4_iterate(match_name, &mrr);
+
+	if (mrr.rc != 0)
+		return (mrr.rc);
+	sc = mrr.sc;
+
+	KASSERT(sc != NULL, ("%s: name (%s) matched but softc is NULL",
+	    __func__, name));
+	ASSERT_SYNCHRONIZED_OP(sc);
+
+	sx_xlock(&t4_trace_lock);
+
+	if (sc->ifp != NULL) {
+		rc = EEXIST;
+		goto done;
+	}
+	if (sc->traceq < 0) {
+		rc = EAGAIN;
+		goto done;
+	}
+
+
+	unit = -1;
+	rc = ifc_alloc_unit(ifc, &unit);
+	if (rc != 0)
+		goto done;
+
+	ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		ifc_free_unit(ifc, unit);
+		rc = ENOMEM;
+		goto done;
+	}
+
+	/* Note that if_xname is not <if_dname><if_dunit>. */
+	strlcpy(ifp->if_xname, name, sizeof(ifp->if_xname));
+	ifp->if_dname = t4_cloner_name;
+	ifp->if_dunit = unit;
+	ifp->if_init = tracer_init;
+	ifp->if_flags = IFF_SIMPLEX | IFF_DRV_RUNNING;
+	ifp->if_ioctl = tracer_ioctl;
+	ifp->if_transmit = tracer_transmit;
+	ifp->if_qflush = tracer_qflush;
+	ifp->if_capabilities = IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
+	ifmedia_init(&sc->media, IFM_IMASK, tracer_media_change,
+	    tracer_media_status);
+	ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | IFM_NONE, 0, NULL);
+	ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | IFM_NONE);
+	ether_ifattach(ifp, lla);
+	if_up(ifp);
+
+	mtx_lock(&sc->ifp_lock);
+	ifp->if_softc = sc;
+	sc->ifp = ifp;
+	mtx_unlock(&sc->ifp_lock);
+done:
+	sx_xunlock(&t4_trace_lock);
+	end_synchronized_op(sc, 0);
+	return (rc);
+}
+
+static int
+t4_cloner_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+	struct adapter *sc;
+	int unit = ifp->if_dunit;
+
+	sx_xlock(&t4_trace_lock);
+	sc = ifp->if_softc;
+	if (sc != NULL) {
+		mtx_lock(&sc->ifp_lock);
+		sc->ifp = NULL;
+		ifp->if_softc = NULL;
+		mtx_unlock(&sc->ifp_lock);
+		ifmedia_removeall(&sc->media);
+	}
+	ether_ifdetach(ifp);
+	if_free(ifp);
+	ifc_free_unit(ifc, unit);
+	sx_xunlock(&t4_trace_lock);
+
+	return (0);
+}
+
+void
+t4_tracer_modload()
+{
+
+	sx_init(&t4_trace_lock, "T4/T5 tracer lock");
+	t4_cloner = if_clone_advanced(t4_cloner_name, 0, t4_cloner_match,
+	    t4_cloner_create, t4_cloner_destroy);
+}
+
+void
+t4_tracer_modunload()
+{
+
+	if (t4_cloner != NULL) {
+		/*
+		 * The module is being unloaded so the nexus drivers have
+		 * detached.  The tracing interfaces can not outlive the nexus
+		 * (ifp->if_softc is the nexus) and must have been destroyed
+		 * already.  XXX: but if_clone is opaque to us and we can't
+		 * assert LIST_EMPTY(&t4_cloner->ifc_iflist) at this time.
+		 */
+		if_clone_detach(t4_cloner);
+	}
+	sx_destroy(&t4_trace_lock);
+}
+
+void
+t4_tracer_port_detach(struct adapter *sc)
+{
+
+	sx_xlock(&t4_trace_lock);
+	if (sc->ifp != NULL) {
+		mtx_lock(&sc->ifp_lock);
+		sc->ifp->if_softc = NULL;
+		sc->ifp = NULL;
+		mtx_unlock(&sc->ifp_lock);
+	}
+	ifmedia_removeall(&sc->media);
+	sx_xunlock(&t4_trace_lock);
+}
+
+int
+t4_get_tracer(struct adapter *sc, struct t4_tracer *t)
+{
+	int rc, i, enabled;
+	struct trace_params tp;
+
+	if (t->idx >= NTRACE) {
+		t->idx = 0xff;
+		t->enabled = 0;
+		t->valid = 0;
+		return (0);
+	}
+
+	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
+	    "t4gett");
+	if (rc)
+		return (rc);
+
+	for (i = t->idx; i < NTRACE; i++) {
+		if (isset(&sc->tracer_valid, t->idx)) {
+			t4_get_trace_filter(sc, &tp, i, &enabled);
+			t->idx = i;
+			t->enabled = enabled;
+			t->valid = 1;
+			memcpy(&t->tp.data[0], &tp.data[0], sizeof(t->tp.data));
+			memcpy(&t->tp.mask[0], &tp.mask[0], sizeof(t->tp.mask));
+			t->tp.snap_len = tp.snap_len;
+			t->tp.min_len = tp.min_len;
+			t->tp.skip_ofst = tp.skip_ofst;
+			t->tp.skip_len = tp.skip_len;
+			t->tp.invert = tp.invert;
+
+			/* convert channel to port iff 0 <= port < 8. */
+			if (tp.port < 4)
+				t->tp.port = sc->chan_map[tp.port];
+			else if (tp.port < 8)
+				t->tp.port = sc->chan_map[tp.port - 4] + 4;
+			else
+				t->tp.port = tp.port;
+
+			goto done;
+		}
+	}
+
+	t->idx = 0xff;
+	t->enabled = 0;
+	t->valid = 0;
+done:
+	end_synchronized_op(sc, LOCK_HELD);
+
+	return (rc);
+}
+
+int
+t4_set_tracer(struct adapter *sc, struct t4_tracer *t)
+{
+	int rc;
+	struct trace_params tp, *tpp;
+
+	if (t->idx >= NTRACE)
+		return (EINVAL);
+
+	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
+	    "t4sett");
+	if (rc)
+		return (rc);
+
+	/*
+	 * If no tracing filter is specified this time then check if the filter
+	 * at the index is valid anyway because it was set previously.  If so
+	 * then this is a legitimate enable/disable operation.
+	 */
+	if (t->valid == 0) {
+		if (isset(&sc->tracer_valid, t->idx))
+			tpp = NULL;
+		else
+			rc = EINVAL;
+		goto done;
+	}
+
+	if (t->tp.port > 19 || t->tp.snap_len > 9600 ||
+	    t->tp.min_len > M_TFMINPKTSIZE || t->tp.skip_len > M_TFLENGTH ||
+	    t->tp.skip_ofst > M_TFOFFSET) {
+		rc = EINVAL;
+		goto done;
+	}
+
+	memcpy(&tp.data[0], &t->tp.data[0], sizeof(tp.data));
+	memcpy(&tp.mask[0], &t->tp.mask[0], sizeof(tp.mask));
+	tp.snap_len = t->tp.snap_len;
+	tp.min_len = t->tp.min_len;
+	tp.skip_ofst = t->tp.skip_ofst;
+	tp.skip_len = t->tp.skip_len;
+	tp.invert = !!t->tp.invert;
+
+	/* convert port to channel iff 0 <= port < 8. */
+	if (t->tp.port < 4) {
+		if (sc->port[t->tp.port] == NULL) {
+			rc = EINVAL;
+			goto done;
+		}
+		tp.port = sc->port[t->tp.port]->tx_chan;
+	} else if (t->tp.port < 8) {
+		if (sc->port[t->tp.port - 4] == NULL) {
+			rc = EINVAL;
+			goto done;
+		}
+		tp.port = sc->port[t->tp.port - 4]->tx_chan + 4;
+	}
+	tpp = &tp;
+done:
+	if (rc == 0) {
+		rc = -t4_set_trace_filter(sc, tpp, t->idx, t->enabled);
+		if (rc == 0) {
+			if (t->enabled) {
+				setbit(&sc->tracer_valid, t->idx);
+				if (sc->tracer_enabled == 0) {
+					t4_set_reg_field(sc, A_MPS_TRC_CFG,
+					    F_TRCEN, F_TRCEN);
+				}
+				setbit(&sc->tracer_enabled, t->idx);
+			} else {
+				clrbit(&sc->tracer_enabled, t->idx);
+				if (sc->tracer_enabled == 0) {
+					t4_set_reg_field(sc, A_MPS_TRC_CFG,
+					    F_TRCEN, 0);
+				}
+			}
+		}
+	}
+	end_synchronized_op(sc, LOCK_HELD);
+
+	return (rc);
+}
+
+int
+t4_trace_pkt(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+	struct adapter *sc = iq->adapter;
+	struct ifnet *ifp;
+
+	KASSERT(m != NULL, ("%s: no payload with opcode %02x", __func__,
+	    rss->opcode));
+
+	mtx_lock(&sc->ifp_lock);
+	ifp = sc->ifp;
+	if (sc->ifp) {
+		m_adj(m, sizeof(struct cpl_trace_pkt));
+		m->m_pkthdr.rcvif = ifp;
+		ETHER_BPF_MTAP(ifp, m);
+	}
+	mtx_unlock(&sc->ifp_lock);
+	m_freem(m);
+
+	return (0);
+}
+
+int
+t5_trace_pkt(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+	struct adapter *sc = iq->adapter;
+	struct ifnet *ifp;
+
+	KASSERT(m != NULL, ("%s: no payload with opcode %02x", __func__,
+	    rss->opcode));
+
+	mtx_lock(&sc->ifp_lock);
+	ifp = sc->ifp;
+	if (ifp != NULL) {
+		m_adj(m, sizeof(struct cpl_t5_trace_pkt));
+		m->m_pkthdr.rcvif = ifp;
+		ETHER_BPF_MTAP(ifp, m);
+	}
+	mtx_unlock(&sc->ifp_lock);
+	m_freem(m);
+
+	return (0);
+}
+
+
+static void
+tracer_init(void *arg)
+{
+
+	return;
+}
+
+static int
+tracer_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
+{
+	int rc = 0;
+	struct adapter *sc;
+	struct ifreq *ifr = (struct ifreq *)data;
+
+	switch (cmd) {
+	case SIOCSIFMTU:
+	case SIOCSIFFLAGS:
+	case SIOCADDMULTI:	
+	case SIOCDELMULTI:
+	case SIOCSIFCAP:
+		break;
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		sx_xlock(&t4_trace_lock);
+		sc = ifp->if_softc;
+		if (sc == NULL)
+			rc = EIO;
+		else
+			rc = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
+		sx_xunlock(&t4_trace_lock);
+		break;
+	default:
+		rc = ether_ioctl(ifp, cmd, data);
+	}
+
+	return (rc);
+}
+
+static int
+tracer_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+
+	m_freem(m);
+	return (0);
+}
+
+static void
+tracer_qflush(struct ifnet *ifp)
+{
+
+	return;
+}
+
+static int
+tracer_media_change(struct ifnet *ifp)
+{
+
+	return (EOPNOTSUPP);
+}
+
+static void
+tracer_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+
+	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
+
+	return;
+}

Modified: head/sys/modules/cxgbe/if_cxgbe/Makefile
==============================================================================
--- head/sys/modules/cxgbe/if_cxgbe/Makefile	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/sys/modules/cxgbe/if_cxgbe/Makefile	Fri Jul 26 22:04:11 2013	(r253691)
@@ -8,7 +8,7 @@ CXGBE = ${.CURDIR}/../../../dev/cxgbe
 .PATH: ${CXGBE} ${CXGBE}/common
 
 KMOD = if_cxgbe
-SRCS = t4_main.c t4_sge.c t4_l2t.c
+SRCS = t4_main.c t4_sge.c t4_l2t.c t4_tracer.c
 SRCS+= t4_hw.c
 SRCS+= device_if.h bus_if.h pci_if.h
 SRCS+= opt_inet.h opt_inet6.h

Modified: head/tools/tools/cxgbetool/cxgbetool.c
==============================================================================
--- head/tools/tools/cxgbetool/cxgbetool.c	Fri Jul 26 21:34:09 2013	(r253690)
+++ head/tools/tools/cxgbetool/cxgbetool.c	Fri Jul 26 22:04:11 2013	(r253691)
@@ -98,6 +98,9 @@ usage(FILE *fp)
 	    "\tregdump [<module>] ...              dump registers\n"
 	    "\tstdio                               interactive mode\n"
 	    "\ttcb <tid>                           read TCB\n"
+	    "\ttracer <idx> tx<n>|rx<n>            set and enable a tracer)\n"
+	    "\ttracer <idx> disable|enable         disable or enable a tracer\n"
+	    "\ttracer list                         list all tracers\n"
 	    );
 }
 
@@ -1658,6 +1661,203 @@ clearstats(int argc, const char *argv[])
 }
 
 static int
+show_tracers(void)
+{
+	struct t4_tracer t;
+	char *s;
+	int rc, port_idx, i;
+	long long val;
+
+	/* Magic values: MPS_TRC_CFG = 0x9800. MPS_TRC_CFG[1:1] = TrcEn */
+	rc = read_reg(0x9800, 4, &val);
+	if (rc != 0)
+		return (rc);
+	printf("tracing is %s\n", val & 2 ? "ENABLED" : "DISABLED");
+
+	t.idx = 0;
+	for (t.idx = 0; ; t.idx++) {
+		rc = doit(CHELSIO_T4_GET_TRACER, &t);
+		if (rc != 0 || t.idx == 0xff)
+			break;
+
+		if (t.tp.port < 4) {
+			s = "Rx";
+			port_idx = t.tp.port;
+		} else if (t.tp.port < 8) {
+			s = "Tx";
+			port_idx = t.tp.port - 4;
+		} else if (t.tp.port < 12) {
+			s = "loopback";
+			port_idx = t.tp.port - 8;
+		} else if (t.tp.port < 16) {
+			s = "MPS Rx";
+			port_idx = t.tp.port - 12;
+		} else if (t.tp.port < 20) {
+			s = "MPS Tx";
+			port_idx = t.tp.port - 16;
+		} else {
+			s = "unknown";
+			port_idx = t.tp.port;
+		}
+
+		printf("\ntracer %u (currently %s) captures ", t.idx,
+		    t.enabled ? "ENABLED" : "DISABLED");
+		if (t.tp.port < 8)
+			printf("port %u %s, ", port_idx, s);
+		else
+			printf("%s %u, ", s, port_idx);
+		printf("snap length: %u, min length: %u\n", t.tp.snap_len,
+		    t.tp.min_len);
+		printf("packets captured %smatch filter\n",
+		    t.tp.invert ? "do not " : "");
+		if (t.tp.skip_ofst) {
+			printf("filter pattern: ");
+			for (i = 0; i < t.tp.skip_ofst * 2; i += 2)
+				printf("%08x%08x", t.tp.data[i],
+				    t.tp.data[i + 1]);
+			printf("/");
+			for (i = 0; i < t.tp.skip_ofst * 2; i += 2)
+				printf("%08x%08x", t.tp.mask[i],
+				    t.tp.mask[i + 1]);
+			printf("@0\n");
+		}
+		printf("filter pattern: ");
+		for (i = t.tp.skip_ofst * 2; i < T4_TRACE_LEN / 4; i += 2)
+			printf("%08x%08x", t.tp.data[i], t.tp.data[i + 1]);
+		printf("/");
+		for (i = t.tp.skip_ofst * 2; i < T4_TRACE_LEN / 4; i += 2)
+			printf("%08x%08x", t.tp.mask[i], t.tp.mask[i + 1]);
+		printf("@%u\n", (t.tp.skip_ofst + t.tp.skip_len) * 8);
+	}
+
+	return (rc);
+}
+
+static int
+tracer_onoff(uint8_t idx, int enabled)
+{
+	struct t4_tracer t;
+
+	t.idx = idx;
+	t.enabled = enabled;
+	t.valid = 0;
+
+	return doit(CHELSIO_T4_SET_TRACER, &t);
+}
+
+static void
+create_tracing_ifnet()
+{
+	char *cmd[] = {
+		"/sbin/ifconfig", __DECONST(char *, nexus), "create", NULL
+	};
+	char *env[] = {NULL};
+
+	if (vfork() == 0) {
+		close(STDERR_FILENO);
+		execve(cmd[0], cmd, env);
+		_exit(0);
+	}
+}
+
+/*
+ * XXX: Allow user to specify snaplen, minlen, and pattern (including inverted
+ * matching).  Right now this is a quick-n-dirty implementation that traces the
+ * first 128B of all tx or rx on a port
+ */
+static int
+set_tracer(uint8_t idx, int argc, const char *argv[])
+{
+	struct t4_tracer t;
+	int len, port;
+
+	bzero(&t, sizeof (t));
+	t.idx = idx;
+	t.enabled = 1;
+	t.valid = 1;
+
+	if (argc != 1) {
+		warnx("must specify tx<n> or rx<n>.");
+		return (EINVAL);
+	}
+
+	len = strlen(argv[0]);
+	if (len != 3) {
+		warnx("argument must be 3 characters (tx<n> or rx<n>)");
+		return (EINVAL);
+	}
+
+	if (strncmp(argv[0], "tx", 2) == 0) {
+		port = argv[0][2] - '0';
+		if (port < 0 || port > 3) {
+			warnx("'%c' in %s is invalid", argv[0][2], argv[0]);
+			return (EINVAL);
+		}
+		port += 4;
+	} else if (strncmp(argv[0], "rx", 2) == 0) {
+		port = argv[0][2] - '0';

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list