svn commit: r366443 - in stable/12: share/man/man4 sys/dev/cxgbe

Navdeep Parhar np at FreeBSD.org
Mon Oct 5 09:03:18 UTC 2020


Author: np
Date: Mon Oct  5 09:03:17 2020
New Revision: 366443
URL: https://svnweb.freebsd.org/changeset/base/366443

Log:
  MFC r365993:
  
  cxgbe(4): let the PF driver use VM work requests for transmit.
  
  This allows the PF interfaces to communicate with the VF interfaces over
  the internal switch in the ASIC.  Fix the GL limits for VM work requests
  while here.
  
  Sponsored by:	Chelsio Communications

Modified:
  stable/12/share/man/man4/cxgbe.4
  stable/12/sys/dev/cxgbe/adapter.h
  stable/12/sys/dev/cxgbe/t4_main.c
  stable/12/sys/dev/cxgbe/t4_sge.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/share/man/man4/cxgbe.4
==============================================================================
--- stable/12/share/man/man4/cxgbe.4	Mon Oct  5 08:51:03 2020	(r366442)
+++ stable/12/share/man/man4/cxgbe.4	Mon Oct  5 09:03:17 2020	(r366443)
@@ -351,6 +351,17 @@ This tunable is for specialized applications only and 
 normal operation.
 The capabilities for which hardware resources have been reserved are listed in
 dev.<nexus>.X.*caps sysctls.
+.It Va hw.cxgbe.tx_vm_wr
+Setting this to 1 instructs the driver to use VM work requests to transmit data.
+This lets PF interfaces transmit frames to VF interfaces over the internal
+switch in the ASIC.
+Note that the
+.Xr cxgbev 4
+VF driver always uses VM work requests and is not affected by this tunable.
+The default value is 0 and should be changed only if PF and VF interfaces need
+to communicate with each other.
+Different interfaces can be assigned different values using the
+dev.<port>.X.tx_vm_wr sysctl when the interface is administratively down.
 .El
 .Sh SUPPORT
 For general information and support,

Modified: stable/12/sys/dev/cxgbe/adapter.h
==============================================================================
--- stable/12/sys/dev/cxgbe/adapter.h	Mon Oct  5 08:51:03 2020	(r366442)
+++ stable/12/sys/dev/cxgbe/adapter.h	Mon Oct  5 09:03:17 2020	(r366443)
@@ -120,6 +120,8 @@ enum {
 	SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */
 	TX_SGL_SEGS = 39,
 	TX_SGL_SEGS_TSO = 38,
+	TX_SGL_SEGS_VM = 38,
+	TX_SGL_SEGS_VM_TSO = 37,
 	TX_SGL_SEGS_EO_TSO = 30,	/* XXX: lower for IPv6. */
 	TX_SGL_SEGS_VXLAN_TSO = 37,
 	TX_WR_FLITS = SGE_MAX_WR_LEN / 8
@@ -174,6 +176,7 @@ enum {
 	DOOMED		= (1 << 0),
 	VI_INIT_DONE	= (1 << 1),
 	VI_SYSCTL_CTX	= (1 << 2),
+	TX_USES_VM_WR 	= (1 << 3),
 
 	/* adapter debug_flags */
 	DF_DUMP_MBOX		= (1 << 0),	/* Log all mbox cmd/rpl. */
@@ -1238,7 +1241,7 @@ void t4_intr_evt(void *);
 void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *);
 void t4_update_fl_bufsize(struct ifnet *);
 struct mbuf *alloc_wr_mbuf(int, int);
-int parse_pkt(struct adapter *, struct mbuf **);
+int parse_pkt(struct mbuf **, bool);
 void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *);
 void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *);
 int tnl_cong(struct port_info *, int);

Modified: stable/12/sys/dev/cxgbe/t4_main.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_main.c	Mon Oct  5 08:51:03 2020	(r366442)
+++ stable/12/sys/dev/cxgbe/t4_main.c	Mon Oct  5 09:03:17 2020	(r366443)
@@ -575,6 +575,10 @@ static int t4_panic_on_fatal_err = 0;
 SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RDTUN,
     &t4_panic_on_fatal_err, 0, "panic on fatal errors");
 
+static int t4_tx_vm_wr = 0;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_vm_wr, CTLFLAG_RWTUN, &t4_tx_vm_wr, 0,
+    "Use VM work requests to transmit packets.");
+
 #ifdef TCP_OFFLOAD
 /*
  * TOE tunables.
@@ -655,6 +659,7 @@ static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
 static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
+static int sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS);
 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
@@ -1669,6 +1674,8 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 
 	vi->xact_addr_filt = -1;
 	callout_init(&vi->tick, 1);
+	if (sc->flags & IS_VF || t4_tx_vm_wr != 0)
+		vi->flags |= TX_USES_VM_WR;
 
 	/* Allocate an ifnet and set it up */
 	ifp = if_alloc(IFT_ETHER);
@@ -1718,7 +1725,10 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 #endif
 
 	ifp->if_hw_tsomax = IP_MAXPACKET;
-	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
+	if (vi->flags & TX_USES_VM_WR)
+		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
+	else
+		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
 #ifdef RATELIMIT
 	if (is_ethoffload(sc) && vi->nofldtxq != 0)
 		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
@@ -2096,7 +2106,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
-	struct adapter *sc = pi->adapter;
+	struct adapter *sc;
 	struct sge_txq *txq;
 	void *items[1];
 	int rc;
@@ -2109,7 +2119,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
 		return (ENETDOWN);
 	}
 
-	rc = parse_pkt(sc, &m);
+	rc = parse_pkt(&m, vi->flags & TX_USES_VM_WR);
 	if (__predict_false(rc != 0)) {
 		MPASS(m == NULL);			/* was freed already */
 		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
@@ -2128,6 +2138,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
 #endif
 
 	/* Select a txq. */
+	sc = vi->adapter;
 	txq = &sc->sge.txq[vi->first_txq];
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
@@ -6499,6 +6510,16 @@ vi_sysctls(struct vi_info *vi)
 		    "Reserve queue 0 for non-flowid packets");
 	}
 
+	if (vi->adapter->flags & IS_VF) {
+		MPASS(vi->flags & TX_USES_VM_WR);
+		SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_vm_wr", CTLFLAG_RD,
+		    NULL, 1, "use VM work requests for transmit");
+	} else {
+		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_vm_wr",
+		    CTLTYPE_INT | CTLFLAG_RW, vi, 0,
+		    sysctl_tx_vm_wr, "I", "use VM work requestes for transmit");
+	}
+
 #ifdef TCP_OFFLOAD
 	if (vi->nofldrxq != 0) {
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
@@ -6920,6 +6941,63 @@ sysctl_noflowq(SYSCTL_HANDLER_ARGS)
 	else
 		vi->rsrv_noflowq = 0;
 
+	return (rc);
+}
+
+static int
+sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS)
+{
+	struct vi_info *vi = arg1;
+	struct adapter *sc = vi->adapter;
+	int rc, val, i;
+
+	MPASS(!(sc->flags & IS_VF));
+
+	val = vi->flags & TX_USES_VM_WR ? 1 : 0;
+	rc = sysctl_handle_int(oidp, &val, 0, req);
+	if (rc != 0 || req->newptr == NULL)
+		return (rc);
+
+	if (val != 0 && val != 1)
+		return (EINVAL);
+
+	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
+	    "t4txvm");
+	if (rc)
+		return (rc);
+	if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING) {
+		/*
+		 * We don't want parse_pkt to run with one setting (VF or PF)
+		 * and then eth_tx to see a different setting but still use
+		 * stale information calculated by parse_pkt.
+		 */
+		rc = EBUSY;
+	} else {
+		struct port_info *pi = vi->pi;
+		struct sge_txq *txq;
+		uint32_t ctrl0;
+		uint8_t npkt = sc->params.max_pkts_per_eth_tx_pkts_wr;
+
+		if (val) {
+			vi->flags |= TX_USES_VM_WR;
+			vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
+			ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
+			    V_TXPKT_INTF(pi->tx_chan));
+			if (!(sc->flags & IS_VF))
+				npkt--;
+		} else {
+			vi->flags &= ~TX_USES_VM_WR;
+			vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
+			ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
+			    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
+			    V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
+		}
+		for_each_txq(vi, i, txq) {
+			txq->cpl_ctrl0 = ctrl0;
+			txq->txp.max_npkt = npkt;
+		}
+	}
+	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 

Modified: stable/12/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_sge.c	Mon Oct  5 08:51:03 2020	(r366442)
+++ stable/12/sys/dev/cxgbe/t4_sge.c	Mon Oct  5 09:03:17 2020	(r366443)
@@ -274,7 +274,7 @@ static void add_fl_to_sfl(struct adapter *, struct sge
 static inline void get_pkt_gl(struct mbuf *, struct sglist *);
 static inline u_int txpkt_len16(u_int, const u_int);
 static inline u_int txpkt_vm_len16(u_int, const u_int);
-static inline void calculate_mbuf_len16(struct adapter *, struct mbuf *);
+static inline void calculate_mbuf_len16(struct mbuf *, bool);
 static inline u_int txpkts0_len16(u_int);
 static inline u_int txpkts1_len16(void);
 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
@@ -2229,6 +2229,7 @@ set_mbuf_len16(struct mbuf *m, uint8_t len16)
 {
 
 	M_ASSERTPKTHDR(m);
+	MPASS(len16 > 0 && len16 <= SGE_MAX_WR_LEN / 16);
 	m->m_pkthdr.PH_loc.eight[0] = len16;
 }
 
@@ -2569,9 +2570,15 @@ count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cf
  * The maximum number of segments that can fit in a WR.
  */
 static int
-max_nsegs_allowed(struct mbuf *m)
+max_nsegs_allowed(struct mbuf *m, bool vm_wr)
 {
 
+	if (vm_wr) {
+		if (needs_tso(m))
+			return (TX_SGL_SEGS_VM_TSO);
+		return (TX_SGL_SEGS_VM);
+	}
+
 	if (needs_tso(m)) {
 		if (needs_vxlan_tso(m))
 			return (TX_SGL_SEGS_VXLAN_TSO);
@@ -2588,7 +2595,7 @@ max_nsegs_allowed(struct mbuf *m)
  * b) it may get defragged up if the gather list is too long for the hardware.
  */
 int
-parse_pkt(struct adapter *sc, struct mbuf **mp)
+parse_pkt(struct mbuf **mp, bool vm_wr)
 {
 	struct mbuf *m0 = *mp, *m;
 	int rc, nsegs, defragged = 0, offset;
@@ -2617,7 +2624,7 @@ restart:
 	M_ASSERTPKTHDR(m0);
 	MPASS(m0->m_pkthdr.len > 0);
 	nsegs = count_mbuf_nsegs(m0, 0, &cflags);
-	if (nsegs > max_nsegs_allowed(m0)) {
+	if (nsegs > max_nsegs_allowed(m0, vm_wr)) {
 		if (defragged++ > 0) {
 			rc = EFBIG;
 			goto fail;
@@ -2645,7 +2652,7 @@ restart:
 	}
 	set_mbuf_nsegs(m0, nsegs);
 	set_mbuf_cflags(m0, cflags);
-	calculate_mbuf_len16(sc, m0);
+	calculate_mbuf_len16(m0, vm_wr);
 
 #ifdef RATELIMIT
 	/*
@@ -3053,7 +3060,7 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool
 
 		if (txp->npkt > 0 || remaining > 1 || txp->score > 3 ||
 		    atomic_load_int(&txq->eq.equiq) != 0) {
-			if (sc->flags & IS_VF)
+			if (vi->flags & TX_USES_VM_WR)
 				rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd);
 			else
 				rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd);
@@ -3069,14 +3076,14 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool
 				if (txp->score++ >= 10)
 					txp->score = 10;
 				MPASS(avail >= tx_len16_to_desc(txp->len16));
-				if (sc->flags & IS_VF)
+				if (vi->flags & TX_USES_VM_WR)
 					n = write_txpkts_vm_wr(sc, txq);
 				else
 					n = write_txpkts_wr(sc, txq);
 			} else {
 				MPASS(avail >=
 				    tx_len16_to_desc(mbuf_len16(txp->mb[0])));
-				if (sc->flags & IS_VF)
+				if (vi->flags & TX_USES_VM_WR)
 					n = write_txpkt_vm_wr(sc, txq,
 					    txp->mb[0]);
 				else
@@ -3118,7 +3125,7 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool
 					break;	/* out of descriptors */
 			}
 			ETHER_BPF_MTAP(ifp, m0);
-			if (sc->flags & IS_VF)
+			if (vi->flags & TX_USES_VM_WR)
 				n = write_txpkt_vm_wr(sc, txq, m0);
 			else
 				n = write_txpkt_wr(sc, txq, m0, avail);
@@ -3161,14 +3168,14 @@ send_txpkts:
 			ETHER_BPF_MTAP(ifp, txp->mb[i]);
 		if (txp->npkt > 1) {
 			MPASS(avail >= tx_len16_to_desc(txp->len16));
-			if (sc->flags & IS_VF)
+			if (vi->flags & TX_USES_VM_WR)
 				n = write_txpkts_vm_wr(sc, txq);
 			else
 				n = write_txpkts_wr(sc, txq);
 		} else {
 			MPASS(avail >=
 			    tx_len16_to_desc(mbuf_len16(txp->mb[0])));
-			if (sc->flags & IS_VF)
+			if (vi->flags & TX_USES_VM_WR)
 				n = write_txpkt_vm_wr(sc, txq, txp->mb[0]);
 			else
 				n = write_txpkt_wr(sc, txq, txp->mb[0], avail);
@@ -4307,7 +4314,7 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int
 	TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq);
 	txq->ifp = vi->ifp;
 	txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK);
-	if (sc->flags & IS_VF)
+	if (vi->flags & TX_USES_VM_WR)
 		txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
 		    V_TXPKT_INTF(pi->tx_chan));
 	else
@@ -4323,6 +4330,8 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int
 	MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr);
 	txq->txp.max_npkt = min(nitems(txp->mb),
 	    sc->params.max_pkts_per_eth_tx_pkts_wr);
+	if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF))
+		txq->txp.max_npkt--;
 
 	snprintf(name, sizeof(name), "%d", idx);
 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
@@ -4632,9 +4641,11 @@ get_pkt_gl(struct mbuf *m, struct sglist *gl)
 	KASSERT(gl->sg_nseg == mbuf_nsegs(m),
 	    ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
 	    mbuf_nsegs(m), gl->sg_nseg));
-	KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m),
+#if 0	/* vm_wr not readily available here. */
+	KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr),
 	    ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
-		gl->sg_nseg, max_nsegs_allowed(m)));
+		gl->sg_nseg, max_nsegs_allowed(m, vm_wr)));
+#endif
 }
 
 /*
@@ -4675,12 +4686,12 @@ txpkt_vm_len16(u_int nsegs, const u_int extra)
 }
 
 static inline void
-calculate_mbuf_len16(struct adapter *sc, struct mbuf *m)
+calculate_mbuf_len16(struct mbuf *m, bool vm_wr)
 {
 	const int lso = sizeof(struct cpl_tx_pkt_lso_core);
 	const int tnl_lso = sizeof(struct cpl_tx_tnl_lso);
 
-	if (sc->flags & IS_VF) {
+	if (vm_wr) {
 		if (needs_tso(m))
 			set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso));
 		else
@@ -5183,8 +5194,6 @@ add_to_txpkts_vf(struct adapter *sc, struct sge_txq *t
     int avail, bool *send)
 {
 	struct txpkts *txp = &txq->txp;
-
-	MPASS(sc->flags & IS_VF);
 
 	/* Cannot have TSO and coalesce at the same time. */
 	if (cannot_use_txpkts(m)) {


More information about the svn-src-stable mailing list