svn commit: r243680 - head/sys/dev/cxgbe/tom

Navdeep Parhar np at FreeBSD.org
Thu Nov 29 19:10:05 UTC 2012


Author: np
Date: Thu Nov 29 19:10:04 2012
New Revision: 243680
URL: http://svnweb.freebsd.org/changeset/base/243680

Log:
  cxgbe/tom: Add a flag to indicate that the L2 table entry for an
  embryonic connection has been setup and never attempt to abort a tid
  before this is done.  This fixes a bad race where a listening socket is
  closed when the driver is in the middle of step (b) here.  The symptom
  of this were "ARP miss" errors from the driver followed by tid leaks.
  
  A hardware-offloaded passive open works this way:
  
  a) A SYN "hits" the TCAM entry for a server tid and the chip delivers it
  to the queue associated with the server tid (say, queue A).  It waits
  for a response from the driver telling it what to do.
  
  b) The driver decides it is ok to proceed.  It adds the new tid to the
  list of embryonic connections associated with the server tid and then
  hands off the SYN to the kernel's syncache to make sure that the kernel
  okays it too.  If it does then the driver provides an L2 table entry,
  queue id (say, queue B), etc. and instructs the chip to send the SYN/ACK
  response.
  
  c) The chip delivers a status to queue B depending on how the third step
  of the 3-way handshake goes.  The driver removes the tid from its list
  of embryonic connections and either expands the syncache entry or
  destroys the tid.  In any case all subsequent messages for the new tid
  will be delivered to queue B, not queue A.  Anything running in queue B
  knows that the L2 entry has long been setup and the new flag is of no
  interest from here on.  If the listener is closed it will deal with
  so_comp as normal.
  
  MFC after:	1 week

Modified:
  head/sys/dev/cxgbe/tom/t4_cpl_io.c
  head/sys/dev/cxgbe/tom/t4_listen.c
  head/sys/dev/cxgbe/tom/t4_tom.h

Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Nov 29 18:23:21 2012	(r243679)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Nov 29 19:10:04 2012	(r243680)
@@ -786,6 +786,29 @@ do_peer_close(struct sge_iq *iq, const s
 	KASSERT(opcode == CPL_PEER_CLOSE,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
+
+	if (__predict_false(toep->flags & TPF_SYNQE)) {
+#ifdef INVARIANTS
+		struct synq_entry *synqe = (void *)toep;
+
+		INP_WLOCK(synqe->lctx->inp);
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
+			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
+			    ("%s: listen socket closed but tid %u not aborted.",
+			    __func__, tid));
+		} else {
+			/*
+			 * do_pass_accept_req is still running and will
+			 * eventually take care of this tid.
+			 */
+		}
+		INP_WUNLOCK(synqe->lctx->inp);
+#endif
+		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
+		    toep, toep->flags);
+		return (0);
+	}
+
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_INFO_WLOCK(&V_tcbinfo);
@@ -1092,13 +1115,24 @@ do_rx_data(struct sge_iq *iq, const stru
 	int len;
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
-		/*
-		 * do_pass_establish failed and must be attempting to abort the
-		 * synqe's tid.  Meanwhile, the T4 has sent us data for such a
-		 * connection.
-		 */
-		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
-		    ("%s: synqe and tid isn't being aborted.", __func__));
+#ifdef INVARIANTS
+		struct synq_entry *synqe = (void *)toep;
+
+		INP_WLOCK(synqe->lctx->inp);
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
+			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
+			    ("%s: listen socket closed but tid %u not aborted.",
+			    __func__, tid));
+		} else {
+			/*
+			 * do_pass_accept_req is still running and will
+			 * eventually take care of this tid.
+			 */
+		}
+		INP_WUNLOCK(synqe->lctx->inp);
+#endif
+		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
+		    toep, toep->flags);
 		m_freem(m);
 		return (0);
 	}

Modified: head/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_listen.c	Thu Nov 29 18:23:21 2012	(r243679)
+++ head/sys/dev/cxgbe/tom/t4_listen.c	Thu Nov 29 19:10:04 2012	(r243680)
@@ -282,8 +282,8 @@ send_reset_synqe(struct toedev *tod, str
 
 	INP_WLOCK_ASSERT(synqe->lctx->inp);
 
-	CTR4(KTR_CXGBE, "%s: synqe %p, tid %d%s",
-	    __func__, synqe, synqe->tid,
+	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
+	    __func__, synqe, synqe->flags, synqe->tid,
 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
@@ -501,8 +501,10 @@ t4_listen_stop(struct toedev *tod, struc
 	 * socket's so_comp.  It doesn't know about the connections on the synq
 	 * so we need to take care of those.
 	 */
-	TAILQ_FOREACH(synqe, &lctx->synq, link)
-		send_reset_synqe(tod, synqe);
+	TAILQ_FOREACH(synqe, &lctx->synq, link) {
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE)
+			send_reset_synqe(tod, synqe);
+	}
 
 	destroy_server(sc, lctx);
 	return (0);
@@ -1131,8 +1133,7 @@ do_pass_accept_req(struct sge_iq *iq, co
 	synqe->lctx = lctx;
 	synqe->syn = m;
 	m = NULL;
-	refcount_init(&synqe->refcnt, 1); /* 1 so that it is held for the
-					     duration of this function */
+	refcount_init(&synqe->refcnt, 0);
 	synqe->l2e_idx = e->idx;
 	synqe->rcv_bufsize = rx_credits;
 	atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
@@ -1155,46 +1156,9 @@ do_pass_accept_req(struct sge_iq *iq, co
 	 * If we replied during syncache_add (synqe->wr has been consumed),
 	 * good.  Otherwise, set it to 0 so that further syncache_respond
 	 * attempts by the kernel will be ignored.
-	 *
-	 * The extra hold on the synqe makes sure that it is still around, even
-	 * if the listener has been dropped and the synqe was aborted and the
-	 * reply to the abort has removed and released the synqe from the synq
-	 * list.
 	 */
 	if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
 
-		INP_WLOCK(inp);
-		if (__predict_false(inp->inp_flags & INP_DROPPED)) {
-			/* listener closed.  synqe must have been aborted. */
-			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
-			    ("%s: listener %p closed but synqe %p not aborted",
-			    __func__, inp, synqe));
-
-			CTR5(KTR_CXGBE,
-			    "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED",
-			    __func__, stid, tid, lctx, synqe);
-			INP_WUNLOCK(inp);
-			free(wr, M_CXGBE);
-			release_synqe(synqe);	/* about to exit function */
-			return (__LINE__);
-		}
-
-		/*
-		 * synqe aborted before TOM replied to PASS_ACCEPT_REQ.  But
-		 * that can only happen if the listener was closed and we just
-		 * checked for that.
-		 */
-		KASSERT(!(synqe->flags & TPF_ABORT_SHUTDOWN),
-		    ("%s: synqe %p aborted, but listener %p not dropped.",
-		    __func__, synqe, inp));
-
-		/* Yank the synqe out of the lctx synq. */
-		TAILQ_REMOVE(&lctx->synq, synqe, link);
-		release_synqe(synqe);	/* removed from synq list */
-		inp = release_lctx(sc, lctx);
-		if (inp)
-			INP_WUNLOCK(inp);
-
 		/*
 		 * syncache may or may not have a hold on the synqe, which may
 		 * or may not be stashed in the original SYN mbuf passed to us.
@@ -1205,13 +1169,39 @@ do_pass_accept_req(struct sge_iq *iq, co
 			m->m_pkthdr.rcvif = ifp;
 
 		remove_tid(sc, synqe->tid);
-		release_synqe(synqe);	/* about to exit function */
 		free(wr, M_CXGBE);
+
+		/* Yank the synqe out of the lctx synq. */
+		INP_WLOCK(inp);
+		TAILQ_REMOVE(&lctx->synq, synqe, link);
+		release_synqe(synqe);	/* removed from synq list */
+		inp = release_lctx(sc, lctx);
+		if (inp)
+			INP_WUNLOCK(inp);
+
 		REJECT_PASS_ACCEPT();
 	}
-	release_synqe(synqe);	/* about to exit function */
+
 	CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
 	    __func__, stid, tid, lctx, synqe);
+
+	INP_WLOCK(inp);
+	synqe->flags |= TPF_SYNQE_HAS_L2TE;
+	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+		/*
+		 * Listening socket closed but tod_listen_stop did not abort
+		 * this tid because there was no L2T entry for the tid at that
+		 * time.  Abort it now.  The reply to the abort will clean up.
+		 */
+		CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORT",
+		    __func__, stid, tid, lctx, synqe);
+		send_reset_synqe(tod, synqe);
+		INP_WUNLOCK(inp);
+
+		return (__LINE__);
+	}
+	INP_WUNLOCK(inp);
+
 	return (0);
 reject:
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
@@ -1293,15 +1283,12 @@ do_pass_establish(struct sge_iq *iq, con
 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
-		/*
-		 * The listening socket has closed.  The TOM must have aborted
-		 * all the embryonic connections (including this one) that were
-		 * on the lctx's synq.  do_abort_rpl for the tid is responsible
-		 * for cleaning up.
-		 */
-		KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
-		    ("%s: listen socket dropped but tid %u not aborted.",
-		    __func__, tid));
+
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
+			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
+			    ("%s: listen socket closed but tid %u not aborted.",
+			    __func__, tid));
+		}
 
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1321,7 +1308,12 @@ do_pass_establish(struct sge_iq *iq, con
 	toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT);
 	if (toep == NULL) {
 reset:
-		/* The reply to this abort will perform final cleanup */
+		/*
+		 * The reply to this abort will perform final cleanup.  There is
+		 * no need to check for HAS_L2TE here.  We can be here only if
+		 * we responded to the PASS_ACCEPT_REQ, and our response had the
+		 * L2T idx.
+		 */
 		send_reset_synqe(TOEDEV(ifp), synqe);
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);

Modified: head/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_tom.h	Thu Nov 29 18:23:21 2012	(r243679)
+++ head/sys/dev/cxgbe/tom/t4_tom.h	Thu Nov 29 19:10:04 2012	(r243680)
@@ -67,6 +67,7 @@ enum {
 	TPF_SYNQE_NEEDFREE = (1 << 9),	/* synq_entry was malloc'd separately */
 	TPF_SYNQE_TCPDDP   = (1 << 10),	/* ulp_mode TCPDDP in toepcb */
 	TPF_SYNQE_EXPANDED = (1 << 11),	/* toepcb ready, tid context updated */
+	TPF_SYNQE_HAS_L2TE = (1 << 12),	/* we've replied to PASS_ACCEPT_REQ */
 };
 
 enum {


More information about the svn-src-all mailing list