PERFORCE change 44208 for review

Sam Leffler sam at FreeBSD.org
Mon Dec 22 16:45:13 PST 2003


http://perforce.freebsd.org/chv.cgi?CH=44208

Change 44208 by sam at sam_ebb on 2003/12/22 16:44:11

	Redo soreceive's handling of MT_CONTROL mbufs to eliminate a LOR
	between so_rcv and the file descriptor lock.  Instead of calling
	the externalize method as each MT_CONTROL mbuf is identified, hold
	them until after the sockbuf lock has been dropped and then process
	them.  This potentially changes the semantics of soreceive when
	errors occur; must monitor the effect of this change carefully.
	
	While here fix some bugs and cleanup some code:
	
	o handle sodupsockaddr failing; return ENOMEM if it happens
	o handle m_copy (aka m_copym) failing; return ENOBUFS if it happens
	o optimize the handling of controlp based on the knowledged that
	  MT_CONTROL mbufs come one at a time
	o simplify MT_CONTROL processing by using if { do { } while } and
	  hoisting code out of the loop
	o add a KASSERT to validate an assumption about error that's used
	  to optimize some subsequent code
	o note that the goto restart case at the bottom of soreceive
	  causes msg recv statistics to be wrong
	
	Open issue: it's unclear what to do if the externalize returns an
	error after retrieving data from the socket.  This is likely to
	confuse applications.

Affected files ...

.. //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#8 edit

Differences ...

==== //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#8 (text+ko) ====

@@ -775,14 +775,19 @@
 	int flags, len, error, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
+	struct mbuf *cm, **cme;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa)
 		*psa = 0;
-	if (controlp)
+	cm = NULL;
+	if (controlp) {
 		*controlp = 0;
+		cme = &cm;
+	} else
+		cme = &cm;		/* XXX to silence gcc */
 	if (flagsp)
 		flags = *flagsp &~ MSG_EOR;
 	else
@@ -893,6 +898,7 @@
 		goto restart;
 	}
 dontblock:
+	KASSERT(error == 0, ("unexpected state, error %u", error));
 	if (uio->uio_td)
 		uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
@@ -901,10 +907,14 @@
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
-		orig_resid = 0;
-		if (psa)
+		if (psa) {
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 					M_NOWAIT);	/* XXX */
+			if (*psa == NULL) {
+				error = ENOMEM;
+				goto release;
+			}
+		}
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
@@ -912,30 +922,39 @@
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 		}
+		orig_resid = 0;
 	}
-	while (m && m->m_type == MT_CONTROL && error == 0) {
-		if (flags & MSG_PEEK) {
-			if (controlp)
-				*controlp = m_copy(m, 0, m->m_len);
-			m = m->m_next;
-		} else {
-			sbfree(&so->so_rcv, m);
-			so->so_rcv.sb_mb = m->m_next;
-			m->m_next = NULL;
-			if (pr->pr_domain->dom_externalize)
-				error =
-				(*pr->pr_domain->dom_externalize)(m, controlp);
-			else if (controlp)
-				*controlp = m;
-			else
-				m_freem(m);
-			m = so->so_rcv.sb_mb;
-		}
-		if (controlp) {
-			orig_resid = 0;
-			while (*controlp != NULL)
-				controlp = &(*controlp)->m_next;
-		}
+	if (m && m->m_type == MT_CONTROL) {
+		do {
+			if (flags & MSG_PEEK) {
+				if (controlp) {
+					*controlp = m_copy(m, 0, m->m_len);
+					if (*controlp == NULL) {
+						error = ENOBUFS;
+						goto release;
+					}
+					controlp = &(*controlp)->m_next;
+				}
+				m = m->m_next;
+			} else {
+				sbfree(&so->so_rcv, m);
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = NULL;
+				if (controlp) {
+					/*
+					 * Link mbufs together for processing
+					 * below.  See the comments there for
+					 * an explanation of why we delay the
+					 * work.
+					 */
+					*cme = m;
+					cme = &(*cme)->m_next;
+				} else
+					m_free(m);
+				m = so->so_rcv.sb_mb;
+			}
+		} while (m && m->m_type == MT_CONTROL);
+		orig_resid = 0;
 	}
 	if (m) {
 		if ((flags & MSG_PEEK) == 0) {
@@ -1132,7 +1151,7 @@
 	}
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0)
-		goto restart;
+		goto restart;		/* XXX multi-counts msgs */
 
 	if (flagsp)
 		*flagsp |= flags;
@@ -1140,6 +1159,20 @@
 	sbunlock(&so->so_rcv);
 out:
 	SOCKBUF_UNLOCK(&so->so_rcv);
+	if (cm != NULL) {
+		/*
+		 * Deal with control data now that we've dopped the
+		 * sockbuf lock.  This is important as otherwise, for
+		 * unix domain sockets, we create a LOR between so_rcv
+		 * and the file descriptor lock. Note we assume the
+		 * externalize method handles a list of mbufs.
+		 */
+		if (error == 0 && pr->pr_domain->dom_externalize) {
+			/* XXX ignore error? */
+			error = (*pr->pr_domain->dom_externalize)(cm, controlp);
+		} else
+			m_freem(cm);
+	}
 	return (error);
 }
 


More information about the p4-projects mailing list