svn commit: r334143 - head/sys/dev/cxgbe
Navdeep Parhar
np at FreeBSD.org
Thu May 24 10:18:16 UTC 2018
Author: np
Date: Thu May 24 10:18:14 2018
New Revision: 334143
URL: https://svnweb.freebsd.org/changeset/base/334143
Log:
cxgbe(4): Data path for rate-limited tx.
This is hardware support for the SO_MAX_PACING_RATE sockopt (see
setsockopt(2)), which is available in kernels built with "options
RATELIMIT".
Relnotes: Yes
Sponsored by: Chelsio Communications
Modified:
head/sys/dev/cxgbe/adapter.h
head/sys/dev/cxgbe/offload.h
head/sys/dev/cxgbe/t4_main.c
head/sys/dev/cxgbe/t4_sched.c
head/sys/dev/cxgbe/t4_sge.c
Modified: head/sys/dev/cxgbe/adapter.h
==============================================================================
--- head/sys/dev/cxgbe/adapter.h Thu May 24 10:17:49 2018 (r334142)
+++ head/sys/dev/cxgbe/adapter.h Thu May 24 10:18:14 2018 (r334143)
@@ -1217,6 +1217,10 @@ void t4_register_an_handler(an_handler_t);
void t4_register_fw_msg_handler(int, fw_msg_handler_t);
void t4_register_cpl_handler(int, cpl_handler_t);
void t4_register_shared_cpl_handler(int, cpl_handler_t, int);
+#ifdef RATELIMIT
+int ethofld_transmit(struct ifnet *, struct mbuf *);
+void send_etid_flush_wr(struct cxgbe_snd_tag *);
+#endif
/* t4_tracer.c */
struct t4_tracer;
@@ -1239,11 +1243,13 @@ void t4_release_cl_rl_kbps(struct adapter *, int, int)
#ifdef RATELIMIT
void t4_init_etid_table(struct adapter *);
void t4_free_etid_table(struct adapter *);
+struct cxgbe_snd_tag *lookup_etid(struct adapter *, int);
int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *,
struct m_snd_tag **);
int cxgbe_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *);
int cxgbe_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *);
void cxgbe_snd_tag_free(struct m_snd_tag *);
+void cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *);
#endif
/* t4_filter.c */
Modified: head/sys/dev/cxgbe/offload.h
==============================================================================
--- head/sys/dev/cxgbe/offload.h Thu May 24 10:17:49 2018 (r334142)
+++ head/sys/dev/cxgbe/offload.h Thu May 24 10:18:14 2018 (r334143)
@@ -79,6 +79,14 @@ union aopen_entry {
union aopen_entry *next;
};
+/* cxgbe_snd_tag flags */
+enum {
+ EO_FLOWC_PENDING = (1 << 0), /* flowc needs to be sent */
+ EO_FLOWC_RPL_PENDING = (1 << 1), /* flowc credits due back */
+ EO_SND_TAG_REF = (1 << 2), /* kernel has a ref on us */
+ EO_FLUSH_RPL_PENDING = (1 << 3), /* credit flush rpl due back */
+};
+
struct cxgbe_snd_tag {
struct m_snd_tag com;
struct adapter *adapter;
@@ -86,13 +94,13 @@ struct cxgbe_snd_tag {
struct mtx lock;
int port_id;
int etid;
+ struct mbufq pending_tx, pending_fwack;
+ int plen;
struct sge_wrq *eo_txq;
+ uint32_t ctrl0;
uint16_t iqid;
int8_t schedcl;
uint64_t max_rate; /* in bytes/s */
- int8_t next_credits; /* need these many tx credits next */
- uint8_t next_nsegs; /* next WR will have these many GL segs total */
- uint8_t next_msegs; /* max segs for a single mbuf in next chain */
uint8_t tx_total; /* total tx WR credits (in 16B units) */
uint8_t tx_credits; /* tx WR credits (in 16B units) available */
uint8_t tx_nocompl; /* tx WR credits since last compl request */
Modified: head/sys/dev/cxgbe/t4_main.c
==============================================================================
--- head/sys/dev/cxgbe/t4_main.c Thu May 24 10:17:49 2018 (r334142)
+++ head/sys/dev/cxgbe/t4_main.c Thu May 24 10:18:14 2018 (r334143)
@@ -1891,6 +1891,17 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
return (rc);
}
+#ifdef RATELIMIT
+ if (m->m_pkthdr.snd_tag != NULL) {
+ /* EAGAIN tells the stack we are not the correct interface. */
+ if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) {
+ m_freem(m);
+ return (EAGAIN);
+ }
+
+ return (ethofld_transmit(ifp, m));
+ }
+#endif
/* Select a txq. */
txq = &sc->sge.txq[vi->first_txq];
Modified: head/sys/dev/cxgbe/t4_sched.c
==============================================================================
--- head/sys/dev/cxgbe/t4_sched.c Thu May 24 10:17:49 2018 (r334142)
+++ head/sys/dev/cxgbe/t4_sched.c Thu May 24 10:18:14 2018 (r334143)
@@ -529,7 +529,6 @@ alloc_etid(struct adapter *sc, struct cxgbe_snd_tag *c
return (etid);
}
-#ifdef notyet
struct cxgbe_snd_tag *
lookup_etid(struct adapter *sc, int etid)
{
@@ -537,7 +536,6 @@ lookup_etid(struct adapter *sc, int etid)
return (t->etid_tab[etid - t->etid_base].cst);
}
-#endif
static void
free_etid(struct adapter *sc, int etid)
@@ -585,14 +583,21 @@ failed:
}
mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF);
+ mbufq_init(&cst->pending_tx, INT_MAX);
+ mbufq_init(&cst->pending_fwack, INT_MAX);
cst->com.ifp = ifp;
+ cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF;
cst->adapter = sc;
cst->port_id = pi->port_id;
cst->schedcl = schedcl;
cst->max_rate = params->rate_limit.max_rate;
- cst->next_credits = -1;
cst->tx_credits = sc->params.ofldq_wr_cred;
cst->tx_total = cst->tx_credits;
+ cst->plen = 0;
+ cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
+ V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) |
+ V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) |
+ V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid)));
/*
* Queues will be selected later when the connection flowid is available.
@@ -616,6 +621,8 @@ cxgbe_snd_tag_modify(struct m_snd_tag *mst,
/* XXX: is schedcl -1 ok here? */
MPASS(cst->schedcl >= 0 && cst->schedcl < sc->chip_params->nsched_cls);
+ mtx_lock(&cst->lock);
+ MPASS(cst->flags & EO_SND_TAG_REF);
rc = t4_reserve_cl_rl_kbps(sc, cst->port_id,
(params->rate_limit.max_rate * 8ULL / 1000), &schedcl);
if (rc != 0)
@@ -624,6 +631,7 @@ cxgbe_snd_tag_modify(struct m_snd_tag *mst,
t4_release_cl_rl_kbps(sc, cst->port_id, cst->schedcl);
cst->schedcl = schedcl;
cst->max_rate = params->rate_limit.max_rate;
+ mtx_unlock(&cst->lock);
return (0);
}
@@ -643,18 +651,53 @@ cxgbe_snd_tag_query(struct m_snd_tag *mst,
return (0);
}
+/*
+ * Unlocks cst and frees it.
+ */
void
-cxgbe_snd_tag_free(struct m_snd_tag *mst)
+cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *cst)
{
- struct cxgbe_snd_tag *cst = mst_to_cst(mst);
struct adapter *sc = cst->adapter;
+ mtx_assert(&cst->lock, MA_OWNED);
+ MPASS((cst->flags & EO_SND_TAG_REF) == 0);
+ MPASS(cst->tx_credits == cst->tx_total);
+ MPASS(cst->plen == 0);
+ MPASS(mbufq_first(&cst->pending_tx) == NULL);
+ MPASS(mbufq_first(&cst->pending_fwack) == NULL);
+
if (cst->etid >= 0)
free_etid(sc, cst->etid);
if (cst->schedcl != -1)
t4_release_cl_rl_kbps(sc, cst->port_id, cst->schedcl);
- if (mtx_initialized(&cst->lock))
- mtx_destroy(&cst->lock);
+ mtx_unlock(&cst->lock);
+ mtx_destroy(&cst->lock);
free(cst, M_CXGBE);
+}
+
+void
+cxgbe_snd_tag_free(struct m_snd_tag *mst)
+{
+ struct cxgbe_snd_tag *cst = mst_to_cst(mst);
+
+ mtx_lock(&cst->lock);
+
+ /* The kernel is done with the snd_tag. Remove its reference. */
+ MPASS(cst->flags & EO_SND_TAG_REF);
+ cst->flags &= ~EO_SND_TAG_REF;
+
+ if (cst->ncompl == 0) {
+ /*
+ * No fw4_ack in flight. Free the tag right away if there are
+ * no outstanding credits. Request the firmware to return all
+ * credits for the etid otherwise.
+ */
+ if (cst->tx_credits == cst->tx_total) {
+ cxgbe_snd_tag_free_locked(cst);
+ return; /* cst is gone. */
+ }
+ send_etid_flush_wr(cst);
+ }
+ mtx_unlock(&cst->lock);
}
#endif
Modified: head/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- head/sys/dev/cxgbe/t4_sge.c Thu May 24 10:17:49 2018 (r334142)
+++ head/sys/dev/cxgbe/t4_sge.c Thu May 24 10:18:14 2018 (r334143)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <machine/in_cksum.h>
#include <machine/md_var.h>
#include <vm/vm.h>
@@ -153,7 +154,24 @@ TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx
static int safest_rx_cluster = PAGE_SIZE;
TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster);
+#ifdef RATELIMIT
/*
+ * Knob to control TCP timestamp rewriting, and the granularity of the tick used
+ * for rewriting. -1 and 0-3 are all valid values.
+ * -1: hardware should leave the TCP timestamps alone.
+ * 0: 1ms
+ * 1: 100us
+ * 2: 10us
+ * 3: 1us
+ */
+static int tsclk = -1;
+TUNABLE_INT("hw.cxgbe.tsclk", &tsclk);
+
+static int eo_max_backlog = 1024 * 1024;
+TUNABLE_INT("hw.cxgbe.eo_max_backlog", &eo_max_backlog);
+#endif
+
+/*
* The interrupt holdoff timers are multiplied by this value on T6+.
* 1 and 3-17 (both inclusive) are legal values.
*/
@@ -279,6 +297,11 @@ static void drain_wrq_wr_list(struct adapter *, struct
static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
static int sysctl_tc(SYSCTL_HANDLER_ARGS);
+#ifdef RATELIMIT
+static inline u_int txpkt_eo_len16(u_int, u_int, u_int);
+static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
+#endif
static counter_u64_t extfree_refs;
static counter_u64_t extfree_rels;
@@ -515,6 +538,10 @@ t4_sge_modload(void)
t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg);
t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx);
+#ifdef RATELIMIT
+ t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack,
+ CPL_COOKIE_ETHOFLD);
+#endif
t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl);
}
@@ -2078,7 +2105,68 @@ set_mbuf_len16(struct mbuf *m, uint8_t len16)
m->m_pkthdr.PH_loc.eight[0] = len16;
}
+#ifdef RATELIMIT
static inline int
+mbuf_eo_nsegs(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ return (m->m_pkthdr.PH_loc.eight[1]);
+}
+
+static inline void
+set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs)
+{
+
+ M_ASSERTPKTHDR(m);
+ m->m_pkthdr.PH_loc.eight[1] = nsegs;
+}
+
+static inline int
+mbuf_eo_len16(struct mbuf *m)
+{
+ int n;
+
+ M_ASSERTPKTHDR(m);
+ n = m->m_pkthdr.PH_loc.eight[2];
+ MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16);
+
+ return (n);
+}
+
+static inline void
+set_mbuf_eo_len16(struct mbuf *m, uint8_t len16)
+{
+
+ M_ASSERTPKTHDR(m);
+ m->m_pkthdr.PH_loc.eight[2] = len16;
+}
+
+static inline int
+mbuf_eo_tsclk_tsoff(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ return (m->m_pkthdr.PH_loc.eight[3]);
+}
+
+static inline void
+set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff)
+{
+
+ M_ASSERTPKTHDR(m);
+ m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff;
+}
+
+static inline int
+needs_eo(struct mbuf *m)
+{
+
+ return (m->m_pkthdr.snd_tag != NULL);
+}
+#endif
+
+static inline int
needs_tso(struct mbuf *m)
{
@@ -2107,6 +2195,22 @@ needs_l4_csum(struct mbuf *m)
}
static inline int
+needs_tcp_csum(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO));
+}
+
+static inline int
+needs_udp_csum(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6));
+}
+
+static inline int
needs_vlan_insertion(struct mbuf *m)
{
@@ -2142,16 +2246,19 @@ m_advance(struct mbuf **pm, int *poffset, int len)
/*
* Can deal with empty mbufs in the chain that have m_len = 0, but the chain
- * must have at least one mbuf that's not empty.
+ * must have at least one mbuf that's not empty. It is possible for this
+ * routine to return 0 if skip accounts for all the contents of the mbuf chain.
*/
static inline int
-count_mbuf_nsegs(struct mbuf *m)
+count_mbuf_nsegs(struct mbuf *m, int skip)
{
vm_paddr_t lastb, next;
vm_offset_t va;
int len, nsegs;
- MPASS(m != NULL);
+ M_ASSERTPKTHDR(m);
+ MPASS(m->m_pkthdr.len > 0);
+ MPASS(m->m_pkthdr.len >= skip);
nsegs = 0;
lastb = 0;
@@ -2160,15 +2267,20 @@ count_mbuf_nsegs(struct mbuf *m)
len = m->m_len;
if (__predict_false(len == 0))
continue;
- va = mtod(m, vm_offset_t);
+ if (skip >= len) {
+ skip -= len;
+ continue;
+ }
+ va = mtod(m, vm_offset_t) + skip;
+ len -= skip;
+ skip = 0;
next = pmap_kextract(va);
- nsegs += sglist_count(m->m_data, len);
+ nsegs += sglist_count((void *)(uintptr_t)va, len);
if (lastb + 1 == next)
nsegs--;
lastb = pmap_kextract(va + len - 1);
}
- MPASS(nsegs > 0);
return (nsegs);
}
@@ -2204,7 +2316,7 @@ restart:
*/
M_ASSERTPKTHDR(m0);
MPASS(m0->m_pkthdr.len > 0);
- nsegs = count_mbuf_nsegs(m0);
+ nsegs = count_mbuf_nsegs(m0, 0);
if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) {
rc = EFBIG;
@@ -2230,7 +2342,20 @@ restart:
else
set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0)));
+#ifdef RATELIMIT
+ /*
+ * Ethofld is limited to TCP and UDP for now, and only when L4 hw
+ * checksumming is enabled. needs_l4_csum happens to check for all the
+ * right things.
+ */
+ if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0)))
+ m0->m_pkthdr.snd_tag = NULL;
+#endif
+
if (!needs_tso(m0) &&
+#ifdef RATELIMIT
+ !needs_eo(m0) &&
+#endif
!(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0))))
return (0);
@@ -2276,11 +2401,34 @@ restart:
}
#if defined(INET) || defined(INET6)
- if (needs_tso(m0)) {
+ if (needs_tcp_csum(m0)) {
tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
m0->m_pkthdr.l4hlen = tcp->th_off * 4;
+#ifdef RATELIMIT
+ if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) {
+ set_mbuf_eo_tsclk_tsoff(m0,
+ V_FW_ETH_TX_EO_WR_TSCLK(tsclk) |
+ V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1));
+ } else
+ set_mbuf_eo_tsclk_tsoff(m0, 0);
+ } else if (needs_udp_csum(m)) {
+ m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
+#endif
}
+#ifdef RATELIMIT
+ if (needs_eo(m0)) {
+ u_int immhdrs;
+
+ /* EO WRs have the headers in the WR and not the GL. */
+ immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen +
+ m0->m_pkthdr.l4hlen;
+ nsegs = count_mbuf_nsegs(m0, immhdrs);
+ set_mbuf_eo_nsegs(m0, nsegs);
+ set_mbuf_eo_len16(m0,
+ txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0)));
+ }
#endif
+#endif
MPASS(m0 == *mp);
return (0);
}
@@ -5302,3 +5450,416 @@ done:
mtx_unlock(&sc->tc_lock);
return (rc);
}
+
+#ifdef RATELIMIT
+/*
+ * len16 for a txpkt WR with a GL. Includes the firmware work request header.
+ */
+static inline u_int
+txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso)
+{
+ u_int n;
+
+ MPASS(immhdrs > 0);
+
+ n = roundup2(sizeof(struct fw_eth_tx_eo_wr) +
+ sizeof(struct cpl_tx_pkt_core) + immhdrs, 16);
+ if (__predict_false(nsegs == 0))
+ goto done;
+
+ nsegs--; /* first segment is part of ulptx_sgl */
+ n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
+ if (tso)
+ n += sizeof(struct cpl_tx_pkt_lso_core);
+
+done:
+ return (howmany(n, 16));
+}
+
+#define ETID_FLOWC_NPARAMS 6
+#define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \
+ ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16))
+#define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16))
+
+static int
+send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi,
+ struct vi_info *vi)
+{
+ struct wrq_cookie cookie;
+ u_int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;
+ struct fw_flowc_wr *flowc;
+
+ mtx_assert(&cst->lock, MA_OWNED);
+ MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) ==
+ EO_FLOWC_PENDING);
+
+ flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie);
+ if (__predict_false(flowc == NULL))
+ return (ENOMEM);
+
+ bzero(flowc, ETID_FLOWC_LEN);
+ flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
+ V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0));
+ flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) |
+ V_FW_WR_FLOWID(cst->etid));
+ flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
+ flowc->mnemval[0].val = htobe32(pfvf);
+ flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
+ flowc->mnemval[1].val = htobe32(pi->tx_chan);
+ flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
+ flowc->mnemval[2].val = htobe32(pi->tx_chan);
+ flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
+ flowc->mnemval[3].val = htobe32(cst->iqid);
+ flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE;
+ flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED);
+ flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
+ flowc->mnemval[5].val = htobe32(cst->schedcl);
+
+ commit_wrq_wr(cst->eo_txq, flowc, &cookie);
+
+ cst->flags &= ~EO_FLOWC_PENDING;
+ cst->flags |= EO_FLOWC_RPL_PENDING;
+ MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */
+ cst->tx_credits -= ETID_FLOWC_LEN16;
+
+ return (0);
+}
+
+#define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16))
+
+void
+send_etid_flush_wr(struct cxgbe_snd_tag *cst)
+{
+ struct fw_flowc_wr *flowc;
+ struct wrq_cookie cookie;
+
+ mtx_assert(&cst->lock, MA_OWNED);
+
+ flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie);
+ if (__predict_false(flowc == NULL))
+ CXGBE_UNIMPLEMENTED(__func__);
+
+ bzero(flowc, ETID_FLUSH_LEN16 * 16);
+ flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
+ V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL);
+ flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) |
+ V_FW_WR_FLOWID(cst->etid));
+
+ commit_wrq_wr(cst->eo_txq, flowc, &cookie);
+
+ cst->flags |= EO_FLUSH_RPL_PENDING;
+ MPASS(cst->tx_credits >= ETID_FLUSH_LEN16);
+ cst->tx_credits -= ETID_FLUSH_LEN16;
+ cst->ncompl++;
+}
+
+static void
+write_ethofld_wr(struct cxgbe_snd_tag *cst, struct fw_eth_tx_eo_wr *wr,
+ struct mbuf *m0, int compl)
+{
+ struct cpl_tx_pkt_core *cpl;
+ uint64_t ctrl1;
+ uint32_t ctrl; /* used in many unrelated places */
+ int len16, pktlen, nsegs, immhdrs;
+ caddr_t dst;
+ uintptr_t p;
+ struct ulptx_sgl *usgl;
+ struct sglist sg;
+ struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */
+
+ mtx_assert(&cst->lock, MA_OWNED);
+ M_ASSERTPKTHDR(m0);
+ KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
+ m0->m_pkthdr.l4hlen > 0,
+ ("%s: ethofld mbuf %p is missing header lengths", __func__, m0));
+
+ if (needs_udp_csum(m0)) {
+ CXGBE_UNIMPLEMENTED("UDP ethofld");
+ }
+
+ len16 = mbuf_eo_len16(m0);
+ nsegs = mbuf_eo_nsegs(m0);
+ pktlen = m0->m_pkthdr.len;
+ ctrl = sizeof(struct cpl_tx_pkt_core);
+ if (needs_tso(m0))
+ ctrl += sizeof(struct cpl_tx_pkt_lso_core);
+ immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen;
+ ctrl += immhdrs;
+
+ wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) |
+ V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl));
+ wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) |
+ V_FW_WR_FLOWID(cst->etid));
+ wr->r3 = 0;
+ wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
+ wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen;
+ wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
+ wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen;
+ wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0);
+ wr->u.tcpseg.r4 = 0;
+ wr->u.tcpseg.r5 = 0;
+ wr->u.tcpseg.plen = htobe32(pktlen - immhdrs);
+
+ if (needs_tso(m0)) {
+ struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
+
+ wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz);
+
+ ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
+ F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2)
+ | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
+ if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header))
+ ctrl |= V_LSO_ETHHDR_LEN(1);
+ if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
+ ctrl |= F_LSO_IPV6;
+ lso->lso_ctrl = htobe32(ctrl);
+ lso->ipid_ofst = htobe16(0);
+ lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
+ lso->seqno_offset = htobe32(0);
+ lso->len = htobe32(pktlen);
+
+ cpl = (void *)(lso + 1);
+ } else {
+ wr->u.tcpseg.mss = htobe16(0xffff);
+ cpl = (void *)(wr + 1);
+ }
+
+ /* Checksum offload must be requested for ethofld. */
+ ctrl1 = 0;
+ MPASS(needs_l4_csum(m0));
+
+ /* VLAN tag insertion */
+ if (needs_vlan_insertion(m0)) {
+ ctrl1 |= F_TXPKT_VLAN_VLD |
+ V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
+ }
+
+ /* CPL header */
+ cpl->ctrl0 = cst->ctrl0;
+ cpl->pack = 0;
+ cpl->len = htobe16(pktlen);
+ cpl->ctrl1 = htobe64(ctrl1);
+
+ /* Copy Ethernet, IP & TCP hdrs as immediate data */
+ p = (uintptr_t)(cpl + 1);
+ m_copydata(m0, 0, immhdrs, (void *)p);
+
+ /* SGL */
+ dst = (void *)(cpl + 1);
+ if (nsegs > 0) {
+ int i, pad;
+
+ /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */
+ p += immhdrs;
+ pad = 16 - (immhdrs & 0xf);
+ bzero((void *)p, pad);
+
+ usgl = (void *)(p + pad);
+ usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
+ V_ULPTX_NSGE(nsegs));
+
+ sglist_init(&sg, nitems(segs), segs);
+ for (; m0 != NULL; m0 = m0->m_next) {
+ if (__predict_false(m0->m_len == 0))
+ continue;
+ if (immhdrs >= m0->m_len) {
+ immhdrs -= m0->m_len;
+ continue;
+ }
+
+ sglist_append(&sg, mtod(m0, char *) + immhdrs,
+ m0->m_len - immhdrs);
+ immhdrs = 0;
+ }
+ MPASS(sg.sg_nseg == nsegs);
+
+ /*
+ * Zero pad last 8B in case the WR doesn't end on a 16B
+ * boundary.
+ */
+ *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0;
+
+ usgl->len0 = htobe32(segs[0].ss_len);
+ usgl->addr0 = htobe64(segs[0].ss_paddr);
+ for (i = 0; i < nsegs - 1; i++) {
+ usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len);
+ usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr);
+ }
+ if (i & 1)
+ usgl->sge[i / 2].len[1] = htobe32(0);
+ }
+
+}
+
+static void
+ethofld_tx(struct cxgbe_snd_tag *cst)
+{
+ struct mbuf *m;
+ struct wrq_cookie cookie;
+ int next_credits, compl;
+ struct fw_eth_tx_eo_wr *wr;
+
+ mtx_assert(&cst->lock, MA_OWNED);
+
+ while ((m = mbufq_first(&cst->pending_tx)) != NULL) {
+ M_ASSERTPKTHDR(m);
+
+ /* How many len16 credits do we need to send this mbuf. */
+ next_credits = mbuf_eo_len16(m);
+ MPASS(next_credits > 0);
+ if (next_credits > cst->tx_credits) {
+ /*
+ * Tx will make progress eventually because there is at
+ * least one outstanding fw4_ack that will return
+ * credits and kick the tx.
+ */
+ MPASS(cst->ncompl > 0);
+ return;
+ }
+ wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie);
+ if (__predict_false(wr == NULL)) {
+ /* XXX: wishful thinking, not a real assertion. */
+ MPASS(cst->ncompl > 0);
+ return;
+ }
+ cst->tx_credits -= next_credits;
+ cst->tx_nocompl += next_credits;
+ compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2;
+ ETHER_BPF_MTAP(cst->com.ifp, m);
+ write_ethofld_wr(cst, wr, m, compl);
+ commit_wrq_wr(cst->eo_txq, wr, &cookie);
+ if (compl) {
+ cst->ncompl++;
+ cst->tx_nocompl = 0;
+ }
+ (void) mbufq_dequeue(&cst->pending_tx);
+ mbufq_enqueue(&cst->pending_fwack, m);
+ }
+}
+
+int
+ethofld_transmit(struct ifnet *ifp, struct mbuf *m0)
+{
+ struct cxgbe_snd_tag *cst;
+ int rc;
+
+ MPASS(m0->m_nextpkt == NULL);
+ MPASS(m0->m_pkthdr.snd_tag != NULL);
+ cst = mst_to_cst(m0->m_pkthdr.snd_tag);
+
+ mtx_lock(&cst->lock);
+ MPASS(cst->flags & EO_SND_TAG_REF);
+
+ if (__predict_false(cst->flags & EO_FLOWC_PENDING)) {
+ struct vi_info *vi = ifp->if_softc;
+ struct port_info *pi = vi->pi;
+ struct adapter *sc = pi->adapter;
+ const uint32_t rss_mask = vi->rss_size - 1;
+ uint32_t rss_hash;
+
+ cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq];
+ if (M_HASHTYPE_ISHASH(m0))
+ rss_hash = m0->m_pkthdr.flowid;
+ else
+ rss_hash = arc4random();
+ /* We assume RSS hashing */
+ cst->iqid = vi->rss[rss_hash & rss_mask];
+ cst->eo_txq += rss_hash % vi->nofldtxq;
+ rc = send_etid_flowc_wr(cst, pi, vi);
+ if (rc != 0)
+ goto done;
+ }
+
+ if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) {
+ rc = ENOBUFS;
+ goto done;
+ }
+
+ mbufq_enqueue(&cst->pending_tx, m0);
+ cst->plen += m0->m_pkthdr.len;
+
+ ethofld_tx(cst);
+ rc = 0;
+done:
+ mtx_unlock(&cst->lock);
+ if (__predict_false(rc != 0))
+ m_freem(m0);
+ return (rc);
+}
+
+static int
+ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
+{
+ struct adapter *sc = iq->adapter;
+ const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
+ struct mbuf *m;
+ u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
+ struct cxgbe_snd_tag *cst;
+ uint8_t credits = cpl->credits;
+
+ cst = lookup_etid(sc, etid);
+ mtx_lock(&cst->lock);
+ if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) {
+ MPASS(credits >= ETID_FLOWC_LEN16);
+ credits -= ETID_FLOWC_LEN16;
+ cst->flags &= ~EO_FLOWC_RPL_PENDING;
+ }
+
+ KASSERT(cst->ncompl > 0,
+ ("%s: etid %u (%p) wasn't expecting completion.",
+ __func__, etid, cst));
+ cst->ncompl--;
+
+ while (credits > 0) {
+ m = mbufq_dequeue(&cst->pending_fwack);
+ if (__predict_false(m == NULL)) {
+ /*
+ * The remaining credits are for the final flush that
+ * was issued when the tag was freed by the kernel.
+ */
+ MPASS((cst->flags &
+ (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) ==
+ EO_FLUSH_RPL_PENDING);
+ MPASS(credits == ETID_FLUSH_LEN16);
+ MPASS(cst->tx_credits + cpl->credits == cst->tx_total);
+ MPASS(cst->ncompl == 0);
+
+ cst->flags &= ~EO_FLUSH_RPL_PENDING;
+ cst->tx_credits += cpl->credits;
+freetag:
+ cxgbe_snd_tag_free_locked(cst);
+ return (0); /* cst is gone. */
+ }
+ KASSERT(m != NULL,
+ ("%s: too many credits (%u, %u)", __func__, cpl->credits,
+ credits));
+ KASSERT(credits >= mbuf_eo_len16(m),
+ ("%s: too few credits (%u, %u, %u)", __func__,
+ cpl->credits, credits, mbuf_eo_len16(m)));
+ credits -= mbuf_eo_len16(m);
+ cst->plen -= m->m_pkthdr.len;
+ m_freem(m);
+ }
+
+ cst->tx_credits += cpl->credits;
+ MPASS(cst->tx_credits <= cst->tx_total);
+
+ m = mbufq_first(&cst->pending_tx);
+ if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
+ ethofld_tx(cst);
+
+ if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) &&
+ cst->ncompl == 0) {
+ if (cst->tx_credits == cst->tx_total)
+ goto freetag;
+ else {
+ MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0);
+ send_etid_flush_wr(cst);
+ }
+ }
+
+ mtx_unlock(&cst->lock);
+
+ return (0);
+}
+#endif
More information about the svn-src-all
mailing list