svn commit: r311949 - head/sys/dev/cxgbe/tom

Navdeep Parhar np at FreeBSD.org
Wed Jan 11 23:48:19 UTC 2017


Author: np
Date: Wed Jan 11 23:48:17 2017
New Revision: 311949
URL: https://svnweb.freebsd.org/changeset/base/311949

Log:
  cxgbe/tom: Add VIMAGE support to the TOE driver.
  
  Active Open:
  - Save the socket's vnet at the time of the active open (t4_connect) and
    switch to it when processing the reply (do_act_open_rpl or
    do_act_establish).
  
  Passive Open:
  - Save the listening socket's vnet in the driver's listen_ctx and switch
    to it when processing incoming SYNs for the socket.
  - Reject SYNs that arrive on an ifnet that's not in the same vnet as the
    listening socket.
  
  CLIP (Compressed Local IPv6) table:
  - Add only those IPv6 addresses to the CLIP that are in a vnet
    associated with one of the card's ifnets.
  
  Misc:
  - Set vnet from the toepcb when processing TCP state transitions.
  - The kernel sets the vnet when calling the driver's output routine
    so t4_push_frames runs in proper vnet context already.  One exception
    is when incoming credits trigger tx within the driver's ithread.  Set
    the vnet explicitly in do_fw4_ack for that case.
  
  MFC after:	3 days
  Sponsored by:	Chelsio Communications

Modified:
  head/sys/dev/cxgbe/tom/t4_connect.c
  head/sys/dev/cxgbe/tom/t4_cpl_io.c
  head/sys/dev/cxgbe/tom/t4_ddp.c
  head/sys/dev/cxgbe/tom/t4_listen.c
  head/sys/dev/cxgbe/tom/t4_tom.c
  head/sys/dev/cxgbe/tom/t4_tom.h

Modified: head/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_connect.c	Wed Jan 11 23:32:40 2017	(r311948)
+++ head/sys/dev/cxgbe/tom/t4_connect.c	Wed Jan 11 23:48:17 2017	(r311949)
@@ -126,6 +126,7 @@ do_act_establish(struct sge_iq *iq, cons
 	CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid);
 	free_atid(sc, atid);
 
+	CURVNET_SET(toep->vnet);
 	INP_WLOCK(inp);
 	toep->tid = tid;
 	insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1);
@@ -141,6 +142,7 @@ do_act_establish(struct sge_iq *iq, cons
 	make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
 done:
 	INP_WUNLOCK(inp);
+	CURVNET_RESTORE();
 	return (0);
 }
 
@@ -178,6 +180,7 @@ act_open_failure_cleanup(struct adapter 
 	free_atid(sc, atid);
 	toep->tid = -1;
 
+	CURVNET_SET(toep->vnet);
 	if (status != EAGAIN)
 		INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
@@ -185,6 +188,7 @@ act_open_failure_cleanup(struct adapter 
 	final_cpl_received(toep);	/* unlocks inp */
 	if (status != EAGAIN)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
 }
 
 /*
@@ -360,6 +364,7 @@ t4_connect(struct toedev *tod, struct so
 	if (wr == NULL)
 		DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
 
+	toep->vnet = so->so_vnet;
 	if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
 		set_tcpddp_ulp_mode(toep);
 	else

Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c	Wed Jan 11 23:32:40 2017	(r311948)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c	Wed Jan 11 23:48:17 2017	(r311949)
@@ -306,7 +306,6 @@ make_established(struct toepcb *toep, ui
 	uint16_t tcpopt = be16toh(opt);
 	struct flowc_tx_params ftxp;
 
-	CURVNET_SET(so->so_vnet);
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_SYN_SENT ||
 	    tp->t_state == TCPS_SYN_RECEIVED,
@@ -357,7 +356,6 @@ make_established(struct toepcb *toep, ui
 	send_flowc_wr(toep, &ftxp);
 
 	soisconnected(so);
-	CURVNET_RESTORE();
 }
 
 static int
@@ -1146,6 +1144,7 @@ do_peer_close(struct sge_iq *iq, const s
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
+	CURVNET_SET(toep->vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
@@ -1191,6 +1190,7 @@ do_peer_close(struct sge_iq *iq, const s
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
 		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);
@@ -1203,6 +1203,7 @@ do_peer_close(struct sge_iq *iq, const s
 done:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
 	return (0);
 }
 
@@ -1229,6 +1230,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
+	CURVNET_SET(toep->vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
@@ -1248,6 +1250,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
 		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);	/* no more CPLs expected */
@@ -1272,6 +1275,7 @@ release:
 done:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
 	return (0);
 }
 
@@ -1345,6 +1349,7 @@ do_abort_req(struct sge_iq *iq, const st
 	}
 
 	inp = toep->inp;
+	CURVNET_SET(toep->vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
 	INP_WLOCK(inp);
 
@@ -1380,6 +1385,7 @@ do_abort_req(struct sge_iq *iq, const st
 	final_cpl_received(toep);
 done:
 	INP_INFO_RUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
@@ -1501,18 +1507,21 @@ do_rx_data(struct sge_iq *iq, const stru
 			DDP_UNLOCK(toep);
 		INP_WUNLOCK(inp);
 
+		CURVNET_SET(toep->vnet);
 		INP_INFO_RLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/* receive buffer autosize */
-	CURVNET_SET(so->so_vnet);
+	MPASS(toep->vnet == so->so_vnet);
+	CURVNET_SET(toep->vnet);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
@@ -1713,10 +1722,12 @@ do_fw4_ack(struct sge_iq *iq, const stru
 		    tid);
 #endif
 		toep->flags &= ~TPF_TX_SUSPENDED;
+		CURVNET_SET(toep->vnet);
 		if (toep->ulp_mode == ULP_MODE_ISCSI)
 			t4_push_pdus(sc, toep, plen);
 		else
 			t4_push_frames(sc, toep, plen);
+		CURVNET_RESTORE();
 	} else if (plen > 0) {
 		struct sockbuf *sb = &so->so_snd;
 		int sbu;
@@ -2143,7 +2154,7 @@ t4_aiotx_task(void *context, int pending
 	struct socket *so = inp->inp_socket;
 	struct kaiocb *job;
 
-	CURVNET_SET(so->so_vnet);
+	CURVNET_SET(toep->vnet);
 	SOCKBUF_LOCK(&so->so_snd);
 	while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
 		job = TAILQ_FIRST(&toep->aiotx_jobq);

Modified: head/sys/dev/cxgbe/tom/t4_ddp.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_ddp.c	Wed Jan 11 23:32:40 2017	(r311948)
+++ head/sys/dev/cxgbe/tom/t4_ddp.c	Wed Jan 11 23:48:17 2017	(r311949)
@@ -546,7 +546,8 @@ handle_ddp_data(struct toepcb *toep, __b
 #endif
 
 	/* receive buffer autosize */
-	CURVNET_SET(so->so_vnet);
+	MPASS(toep->vnet == so->so_vnet);
+	CURVNET_SET(toep->vnet);
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&

Modified: head/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_listen.c	Wed Jan 11 23:32:40 2017	(r311948)
+++ head/sys/dev/cxgbe/tom/t4_listen.c	Wed Jan 11 23:48:17 2017	(r311949)
@@ -222,6 +222,7 @@ alloc_lctx(struct adapter *sc, struct in
 	TAILQ_INIT(&lctx->synq);
 
 	lctx->inp = inp;
+	lctx->vnet = inp->inp_socket->so_vnet;
 	in_pcbref(inp);
 
 	return (lctx);
@@ -1200,6 +1201,8 @@ do_pass_accept_req(struct sge_iq *iq, co
 
 	pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
 
+	CURVNET_SET(lctx->vnet);
+
 	/*
 	 * Use the MAC index to lookup the associated VI.  If this SYN
 	 * didn't match a perfect MAC filter, punt.
@@ -1274,6 +1277,13 @@ found:
 		ntids = 1;
 	}
 
+	/*
+	 * Don't offload if the ifnet that the SYN came in on is not in the same
+	 * vnet as the listening socket.
+	 */
+	if (lctx->vnet != ifp->if_vnet)
+		REJECT_PASS_ACCEPT();
+
 	e = get_l2te_for_nexthop(pi, ifp, &inc);
 	if (e == NULL)
 		REJECT_PASS_ACCEPT();
@@ -1313,7 +1323,6 @@ found:
 		REJECT_PASS_ACCEPT();
 	}
 	so = inp->inp_socket;
-	CURVNET_SET(so->so_vnet);
 
 	mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
 	rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
@@ -1360,7 +1369,6 @@ found:
 	 */
 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
 	INP_UNLOCK_ASSERT(inp);	/* ok to assert, we have a ref on the inp */
-	CURVNET_RESTORE();
 
 	/*
 	 * If we replied during syncache_add (synqe->wr has been consumed),
@@ -1415,10 +1423,12 @@ found:
 		return (__LINE__);
 	}
 	INP_WUNLOCK(inp);
+	CURVNET_RESTORE();
 
 	release_synqe(synqe);	/* extra hold */
 	return (0);
 reject:
+	CURVNET_RESTORE();
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
 	    reject_reason);
 
@@ -1490,6 +1500,7 @@ do_pass_establish(struct sge_iq *iq, con
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
 
+	CURVNET_SET(lctx->vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
@@ -1507,6 +1518,7 @@ do_pass_establish(struct sge_iq *iq, con
 
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
 		return (0);
 	}
 
@@ -1532,6 +1544,7 @@ reset:
 		send_reset_synqe(TOEDEV(ifp), synqe);
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
 		return (0);
 	}
 	toep->tid = tid;
@@ -1568,6 +1581,8 @@ reset:
 	/* New connection inpcb is already locked by syncache_expand(). */
 	new_inp = sotoinpcb(so);
 	INP_WLOCK_ASSERT(new_inp);
+	MPASS(so->so_vnet == lctx->vnet);
+	toep->vnet = lctx->vnet;
 
 	/*
 	 * This is for the unlikely case where the syncache entry that we added
@@ -1591,6 +1606,7 @@ reset:
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
 	release_synqe(synqe);
 
 	return (0);

Modified: head/sys/dev/cxgbe/tom/t4_tom.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_tom.c	Wed Jan 11 23:32:40 2017	(r311948)
+++ head/sys/dev/cxgbe/tom/t4_tom.c	Wed Jan 11 23:48:17 2017	(r311949)
@@ -799,74 +799,96 @@ update_clip_table(struct adapter *sc, st
 	struct in6_addr *lip, tlip;
 	struct clip_head stale;
 	struct clip_entry *ce, *ce_temp;
-	int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen);
+	struct vi_info *vi;
+	int rc, gen, i, j;
+	uintptr_t last_vnet;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	mtx_lock(&td->clip_table_lock);
 
+	gen = atomic_load_acq_int(&in6_ifaddr_gen);
 	if (gen == td->clip_gen)
 		goto done;
 
 	TAILQ_INIT(&stale);
 	TAILQ_CONCAT(&stale, &td->clip_table, link);
 
-	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
-		lip = &ia->ia_addr.sin6_addr;
+	/*
+	 * last_vnet optimizes the common cases where all if_vnet = NULL (no
+	 * VIMAGE) or all if_vnet = vnet0.
+	 */
+	last_vnet = (uintptr_t)(-1);
+	for_each_port(sc, i)
+	for_each_vi(sc->port[i], j, vi) {
+		if (last_vnet == (uintptr_t)vi->ifp->if_vnet)
+			continue;
 
-		KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
-		    ("%s: mcast address in in6_ifaddr list", __func__));
+		/* XXX: races with if_vmove */
+		CURVNET_SET(vi->ifp->if_vnet);
+		TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+			lip = &ia->ia_addr.sin6_addr;
+
+			KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
+			    ("%s: mcast address in in6_ifaddr list", __func__));
+
+			if (IN6_IS_ADDR_LOOPBACK(lip))
+				continue;
+			if (IN6_IS_SCOPE_EMBED(lip)) {
+				/* Remove the embedded scope */
+				tlip = *lip;
+				lip = &tlip;
+				in6_clearscope(lip);
+			}
+			/*
+			 * XXX: how to weed out the link local address for the
+			 * loopback interface?  It's fe80::1 usually (always?).
+			 */
+
+			/*
+			 * If it's in the main list then we already know it's
+			 * not stale.
+			 */
+			TAILQ_FOREACH(ce, &td->clip_table, link) {
+				if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
+					goto next;
+			}
 
-		if (IN6_IS_ADDR_LOOPBACK(lip))
-			continue;
-		if (IN6_IS_SCOPE_EMBED(lip)) {
-			/* Remove the embedded scope */
-			tlip = *lip;
-			lip = &tlip;
-			in6_clearscope(lip);
-		}
-		/*
-		 * XXX: how to weed out the link local address for the loopback
-		 * interface?  It's fe80::1 usually (always?).
-		 */
-
-		/*
-		 * If it's in the main list then we already know it's not stale.
-		 */
-		TAILQ_FOREACH(ce, &td->clip_table, link) {
-			if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
-				goto next;
-		}
+			/*
+			 * If it's in the stale list we should move it to the
+			 * main list.
+			 */
+			TAILQ_FOREACH(ce, &stale, link) {
+				if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
+					TAILQ_REMOVE(&stale, ce, link);
+					TAILQ_INSERT_TAIL(&td->clip_table, ce,
+					    link);
+					goto next;
+				}
+			}
 
-		/*
-		 * If it's in the stale list we should move it to the main list.
-		 */
-		TAILQ_FOREACH(ce, &stale, link) {
-			if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
-				TAILQ_REMOVE(&stale, ce, link);
+			/* A new IP6 address; add it to the CLIP table */
+			ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
+			memcpy(&ce->lip, lip, sizeof(ce->lip));
+			ce->refcount = 0;
+			rc = add_lip(sc, lip);
+			if (rc == 0)
 				TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
-				goto next;
-			}
-		}
+			else {
+				char ip[INET6_ADDRSTRLEN];
 
-		/* A new IP6 address; add it to the CLIP table */
-		ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
-		memcpy(&ce->lip, lip, sizeof(ce->lip));
-		ce->refcount = 0;
-		rc = add_lip(sc, lip);
-		if (rc == 0)
-			TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
-		else {
-			char ip[INET6_ADDRSTRLEN];
-
-			inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip));
-			log(LOG_ERR, "%s: could not add %s (%d)\n",
-			    __func__, ip, rc);
-			free(ce, M_CXGBE);
-		}
+				inet_ntop(AF_INET6, &ce->lip, &ip[0],
+				    sizeof(ip));
+				log(LOG_ERR, "%s: could not add %s (%d)\n",
+				    __func__, ip, rc);
+				free(ce, M_CXGBE);
+			}
 next:
-		continue;
+			continue;
+		}
+		CURVNET_RESTORE();
+		last_vnet = (uintptr_t)vi->ifp->if_vnet;
 	}
 
 	/*

Modified: head/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_tom.h	Wed Jan 11 23:32:40 2017	(r311948)
+++ head/sys/dev/cxgbe/tom/t4_tom.h	Wed Jan 11 23:48:17 2017	(r311949)
@@ -141,6 +141,7 @@ struct toepcb {
 	int refcount;
 	struct tom_data *td;
 	struct inpcb *inp;	/* backpointer to host stack's PCB */
+	struct vnet *vnet;
 	struct vi_info *vi;	/* virtual interface */
 	struct sge_wrq *ofld_txq;
 	struct sge_ofld_rxq *ofld_rxq;
@@ -232,6 +233,7 @@ struct listen_ctx {
 	struct stid_region stid_region;
 	int flags;
 	struct inpcb *inp;		/* listening socket's inp */
+	struct vnet *vnet;
 	struct sge_wrq *ctrlq;
 	struct sge_ofld_rxq *ofld_rxq;
 	struct clip_entry *ce;


More information about the svn-src-head mailing list