PERFORCE change 44208 for review
Sam Leffler
sam at FreeBSD.org
Mon Dec 22 16:45:13 PST 2003
http://perforce.freebsd.org/chv.cgi?CH=44208
Change 44208 by sam at sam_ebb on 2003/12/22 16:44:11
Redo soreceive's handling of MT_CONTROL mbufs to eliminate a LOR
between so_rcv and the file descriptor lock. Instead of calling
the externalize method as each MT_CONTROL mbuf is identified, hold
them until after the sockbuf lock has been dropped and then process
them. This potentially changes the semantics of soreceive when
errors occur; must monitor the effect of this change carefully.
While here fix some bugs and cleanup some code:
o handle sodupsockaddr failing; return ENOMEM if it happens
o handle m_copy (aka m_copym) failing; return ENOBUFS if it happens
o optimize the handling of controlp based on the knowledged that
MT_CONTROL mbufs come one at a time
o simplify MT_CONTROL processing by using if { do { } while } and
hoisting code out of the loop
o add a KASSERT to validate an assumption about error that's used
to optimize some subsequent code
o note that the goto restart case at the bottom of soreceive
causes msg recv statistics to be wrong
Open issue: it's unclear what to do if the externalize returns an
error after retrieving data from the socket. This is likely to
confuse applications.
Affected files ...
.. //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#8 edit
Differences ...
==== //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#8 (text+ko) ====
@@ -775,14 +775,19 @@
int flags, len, error, offset;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
+ struct mbuf *cm, **cme;
int moff, type = 0;
int orig_resid = uio->uio_resid;
mp = mp0;
if (psa)
*psa = 0;
- if (controlp)
+ cm = NULL;
+ if (controlp) {
*controlp = 0;
+ cme = &cm;
+ } else
+ cme = &cm; /* XXX to silence gcc */
if (flagsp)
flags = *flagsp &~ MSG_EOR;
else
@@ -893,6 +898,7 @@
goto restart;
}
dontblock:
+ KASSERT(error == 0, ("unexpected state, error %u", error));
if (uio->uio_td)
uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
SBLASTRECORDCHK(&so->so_rcv);
@@ -901,10 +907,14 @@
if (pr->pr_flags & PR_ADDR) {
KASSERT(m->m_type == MT_SONAME,
("m->m_type == %d", m->m_type));
- orig_resid = 0;
- if (psa)
+ if (psa) {
*psa = sodupsockaddr(mtod(m, struct sockaddr *),
M_NOWAIT); /* XXX */
+ if (*psa == NULL) {
+ error = ENOMEM;
+ goto release;
+ }
+ }
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
@@ -912,30 +922,39 @@
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
+ orig_resid = 0;
}
- while (m && m->m_type == MT_CONTROL && error == 0) {
- if (flags & MSG_PEEK) {
- if (controlp)
- *controlp = m_copy(m, 0, m->m_len);
- m = m->m_next;
- } else {
- sbfree(&so->so_rcv, m);
- so->so_rcv.sb_mb = m->m_next;
- m->m_next = NULL;
- if (pr->pr_domain->dom_externalize)
- error =
- (*pr->pr_domain->dom_externalize)(m, controlp);
- else if (controlp)
- *controlp = m;
- else
- m_freem(m);
- m = so->so_rcv.sb_mb;
- }
- if (controlp) {
- orig_resid = 0;
- while (*controlp != NULL)
- controlp = &(*controlp)->m_next;
- }
+ if (m && m->m_type == MT_CONTROL) {
+ do {
+ if (flags & MSG_PEEK) {
+ if (controlp) {
+ *controlp = m_copy(m, 0, m->m_len);
+ if (*controlp == NULL) {
+ error = ENOBUFS;
+ goto release;
+ }
+ controlp = &(*controlp)->m_next;
+ }
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ so->so_rcv.sb_mb = m->m_next;
+ m->m_next = NULL;
+ if (controlp) {
+ /*
+ * Link mbufs together for processing
+ * below. See the comments there for
+ * an explanation of why we delay the
+ * work.
+ */
+ *cme = m;
+ cme = &(*cme)->m_next;
+ } else
+ m_free(m);
+ m = so->so_rcv.sb_mb;
+ }
+ } while (m && m->m_type == MT_CONTROL);
+ orig_resid = 0;
}
if (m) {
if ((flags & MSG_PEEK) == 0) {
@@ -1132,7 +1151,7 @@
}
if (orig_resid == uio->uio_resid && orig_resid &&
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0)
- goto restart;
+ goto restart; /* XXX multi-counts msgs */
if (flagsp)
*flagsp |= flags;
@@ -1140,6 +1159,20 @@
sbunlock(&so->so_rcv);
out:
SOCKBUF_UNLOCK(&so->so_rcv);
+ if (cm != NULL) {
+ /*
+ * Deal with control data now that we've dopped the
+ * sockbuf lock. This is important as otherwise, for
+ * unix domain sockets, we create a LOR between so_rcv
+ * and the file descriptor lock. Note we assume the
+ * externalize method handles a list of mbufs.
+ */
+ if (error == 0 && pr->pr_domain->dom_externalize) {
+ /* XXX ignore error? */
+ error = (*pr->pr_domain->dom_externalize)(cm, controlp);
+ } else
+ m_freem(cm);
+ }
return (error);
}
More information about the p4-projects
mailing list