svn commit: r257659 - in projects/mbuf_iovec/sys: kern netinet sys

Adrian Chadd adrian at FreeBSD.org
Mon Nov 4 22:12:27 UTC 2013


Author: adrian
Date: Mon Nov  4 22:12:25 2013
New Revision: 257659
URL: http://svnweb.freebsd.org/changeset/base/257659

Log:
  Start fleshing out some experimental hacks to start tidying up the
  mbuf access in preparation for mbuf iovec support (multiple buffers
  per mbuf.)
  
  There are a bunch of fundamental problems:
  
  * There's too much direct access of m_data, m_len, m_pktlen and even
    the underlying buffer (m_pktdat and friends.)  If we're going down
    the path of turning mbufs into iovecs, these need to be tidied up.
  
  * The direct method access of m_data / m_len / etc hides away
    the intent of the action from the actual action itself.
    For example, there's direct gymnastics of m_data to do headroom
    reservation.  There should be an mbuf method that reserves
    an amount of headroom in the mbuf.  Same with allocating tailroom.
    There are also plenty of cases where stuff will "skip" a header
    temporarily by modify m_data / m_len to skip _over_ the header,
    then adjust them back.
  
  * The direct gynmastics with m_len vs m_pktlen is also fraught
    with danger.
  
  * There's lots of hand written mbuf iterators over lists of mbufs
    that represent frames (chained with m_nextpkt) and lists of mbufs
    inside a given frame (chained with m_next.)  This is again
    annoying to diagnose, debug and modify.
  
  So, to make iovec stuff possible:
  
  * The data versus state/flags bits need to be separated out.
    That way the data storage stuff can be optionally turned into
    an array.
  
  * All the direct access of m_data / m_len / m_pkgdat / etc needs to
    go away.  They are now per-buffer versus per-mbuf things.
    Right now that's the same thing, but that does need to change.
  
  * An iterator for mbufs needs to be written and sprinkled around
    the codebase.  Specifically for this particular effort,
    m_next needs to be taken out and shot, replaced with an iterator
    that will iterate over mbufs and then either bump the mbuf data
    index or follow m->m_next to the next mbuf in the chain.
  
  This is still all early days.  I'm going through the exercise
  of converting things to methods and killing direct m->m_data access
  (which should be mtod() in almost all instances) as part of a general
  tidyup that will be good regardless of whether this work goes anywhere
  or not.
  
  Sponsored by:	Netflix, Inc.

Modified:
  projects/mbuf_iovec/sys/kern/uipc_mbuf.c
  projects/mbuf_iovec/sys/netinet/ip_icmp.c
  projects/mbuf_iovec/sys/netinet/ip_input.c
  projects/mbuf_iovec/sys/netinet/ip_output.c
  projects/mbuf_iovec/sys/netinet/tcp_output.c
  projects/mbuf_iovec/sys/netinet/tcp_subr.c
  projects/mbuf_iovec/sys/netinet/tcp_syncache.c
  projects/mbuf_iovec/sys/sys/mbuf.h

Modified: projects/mbuf_iovec/sys/kern/uipc_mbuf.c
==============================================================================
--- projects/mbuf_iovec/sys/kern/uipc_mbuf.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/kern/uipc_mbuf.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -2180,3 +2180,59 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufpro
 	    NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics");
 #endif
 
+int
+m_adj_data_head_rel(struct mbuf *m, int adj)
+{
+
+	m->m_data += adj;
+	m->m_len -= adj;
+	return (0);
+}
+
+int
+m_adj_data_head_abs(struct mbuf *m, unsigned int val)
+{
+
+	panic("%s: not yet implemented!\n", __func__);
+	return (0);
+}
+
+int
+m_adj_pktlen_head_rel(struct mbuf *m, int adj)
+{
+
+	m->m_pkthdr.len += adj;
+	return (0);
+}
+
+int
+m_adj_pktlen_head_abs(struct mbuf *m, unsigned int val)
+{
+
+	m->m_pkthdr.len = val;
+	return (0);
+}
+
+int
+m_reserv_data_head(struct mbuf *m, unsigned int adj)
+{
+
+	m->m_data += adj;
+	return (0);
+}
+
+int
+m_len_set_abs(struct mbuf *m, unsigned int len)
+{
+
+	m->m_len = len;
+	return (0);
+}
+
+int
+m_len_set_rel(struct mbuf *m, int adj)
+{
+
+	m->m_len += adj;
+	return (0);
+}

Modified: projects/mbuf_iovec/sys/netinet/ip_icmp.c
==============================================================================
--- projects/mbuf_iovec/sys/netinet/ip_icmp.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/netinet/ip_icmp.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -337,9 +337,8 @@ stdreply:	icmpelen = max(8, min(V_icmp_q
 	 * reply should bypass as well.
 	 */
 	m->m_flags |= n->m_flags & M_SKIP_FIREWALL;
-	m->m_data -= sizeof(struct ip);
-	m->m_len += sizeof(struct ip);
-	m->m_pkthdr.len = m->m_len;
+	m_adj_data_head_rel(m, - ((int) sizeof(struct ip)));
+	m_adj_pktlen_head_abs(m, m_get_len(m));
 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
 	nip = mtod(m, struct ip *);
 	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
@@ -392,15 +391,13 @@ icmp_input(struct mbuf *m, int off)
 		return;
 	}
 	ip = mtod(m, struct ip *);
-	m->m_len -= hlen;
-	m->m_data += hlen;
+	m_adj_data_head_rel(m, hlen);
 	icp = mtod(m, struct icmp *);
 	if (in_cksum(m, icmplen)) {
 		ICMPSTAT_INC(icps_checksum);
 		goto freeit;
 	}
-	m->m_len += hlen;
-	m->m_data -= hlen;
+	m_adj_data_head_rel(m, -hlen);
 
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
 		/*
@@ -885,13 +882,11 @@ icmp_send(struct mbuf *m, struct mbuf *o
 	register struct icmp *icp;
 
 	hlen = ip->ip_hl << 2;
-	m->m_data += hlen;
-	m->m_len -= hlen;
+	m_adj_data_head_rel(m, hlen);
 	icp = mtod(m, struct icmp *);
 	icp->icmp_cksum = 0;
 	icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
-	m->m_data -= hlen;
-	m->m_len += hlen;
+	m_adj_data_head_rel(m, -hlen);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef ICMPPRINTFS
 	if (icmpprintfs) {

Modified: projects/mbuf_iovec/sys/netinet/ip_input.c
==============================================================================
--- projects/mbuf_iovec/sys/netinet/ip_input.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/netinet/ip_input.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -934,8 +934,7 @@ found:
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
-	m->m_data += hlen;
-	m->m_len -= hlen;
+	m_adj_data_head_rel(m, hlen);
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
@@ -1125,8 +1124,7 @@ found:
 	TAILQ_REMOVE(head, fp, ipq_list);
 	V_nipq--;
 	uma_zfree(V_ipq_zone, fp);
-	m->m_len += (ip->ip_hl << 2);
-	m->m_data -= (ip->ip_hl << 2);
+	m_adj_data_head_rel(m, -(ip->ip_hl << 2));
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);

Modified: projects/mbuf_iovec/sys/netinet/ip_output.c
==============================================================================
--- projects/mbuf_iovec/sys/netinet/ip_output.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/netinet/ip_output.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -806,7 +806,7 @@ smart_frag_failure:
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
-		m->m_len = mhlen;
+		m_len_set_abs(m, mhlen);
 		/* XXX do we need to add ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip_len)

Modified: projects/mbuf_iovec/sys/netinet/tcp_output.c
==============================================================================
--- projects/mbuf_iovec/sys/netinet/tcp_output.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/netinet/tcp_output.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -860,8 +860,8 @@ send:
 			goto out;
 		}
 
-		m->m_data += max_linkhdr;
-		m->m_len = hdrlen;
+		m_reserv_data_head(m, max_linkhdr);
+		m_len_set_abs(m, hdrlen);
 
 		/*
 		 * Start the m_copy functions from the closest mbuf
@@ -916,8 +916,8 @@ send:
 			MH_ALIGN(m, hdrlen);
 		} else
 #endif
-		m->m_data += max_linkhdr;
-		m->m_len = hdrlen;
+		m_reserv_data_head(m, max_linkhdr);
+		m_len_set_abs(m, hdrlen);
 	}
 	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
@@ -1080,7 +1080,7 @@ send:
 	 * Put TCP length in extended header, and then
 	 * checksum extended header and data.
 	 */
-	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
+	m_adj_pktlen_head_abs(m, hdrlen + len); /* in6_cksum() need this */
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {

Modified: projects/mbuf_iovec/sys/netinet/tcp_subr.c
==============================================================================
--- projects/mbuf_iovec/sys/netinet/tcp_subr.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/netinet/tcp_subr.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -582,7 +582,7 @@ tcp_respond(struct tcpcb *tp, void *ipge
 		if (m == NULL)
 			return;
 		tlen = 0;
-		m->m_data += max_linkhdr;
+		m_reserv_data_head(m, max_linkhdr);
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),

Modified: projects/mbuf_iovec/sys/netinet/tcp_syncache.c
==============================================================================
--- projects/mbuf_iovec/sys/netinet/tcp_syncache.c	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/netinet/tcp_syncache.c	Mon Nov  4 22:12:25 2013	(r257659)
@@ -1424,9 +1424,9 @@ syncache_respond(struct syncache *sc)
 #ifdef MAC
 	mac_syncache_create_mbuf(sc->sc_label, m);
 #endif
-	m->m_data += max_linkhdr;
-	m->m_len = tlen;
-	m->m_pkthdr.len = tlen;
+	m_reserv_data_head(m, max_linkhdr);
+	m_len_set_abs(m, tlen);
+	m_adj_pktlen_head_abs(m, tlen);
 	m->m_pkthdr.rcvif = NULL;
 
 #ifdef INET6

Modified: projects/mbuf_iovec/sys/sys/mbuf.h
==============================================================================
--- projects/mbuf_iovec/sys/sys/mbuf.h	Mon Nov  4 21:54:56 2013	(r257658)
+++ projects/mbuf_iovec/sys/sys/mbuf.h	Mon Nov  4 22:12:25 2013	(r257659)
@@ -1171,5 +1171,51 @@ rt_m_getfib(struct mbuf *m)
  #define M_PROFILE(m)
 #endif
 
+/*
+ * Bump the leading part of m_data by the given amount.
+ * This is relative to what m_data is currently set to.
+ * The value can be positive or negative.
+ *
+ * This will update m_data and m_len, but not m_pktlen.
+ */
+extern int m_adj_data_head_rel(struct mbuf *, int);
+extern int m_adj_data_head_abs(struct mbuf *, unsigned int);
+
+/*
+ * Bump the trailer part of m_len by the given amount.
+ * This is relative to what m_len is currently set to.
+ * The value can be positive or negative.
+ *
+ * This will update m_len but not m_pktlen.
+ */
+extern int m_adj_pktlen_head_rel(struct mbuf *, int);
+extern int m_adj_pktlen_head_abs(struct mbuf *, unsigned int);
+
+/*
+ * Get the m_len field.
+ */
+static inline int m_get_len(struct mbuf *m)
+{
+	return (m->m_len);
+
+}
+
+/*
+ * Reserve the given amount of data in the front of the mbuf.
+ * This doesn't update m_len, because reserving header data
+ * doesn't necessarily imply changing the length of the
+ * payload.
+ */
+extern int m_reserv_data_head(struct mbuf *, unsigned int);
+
+/*
+ * Set the mbuf length to the given value.
+ */
+extern int m_len_set_abs(struct mbuf *, unsigned int);
+
+/*
+ * Adjust the mbuf length by the given value.
+ */
+extern int m_len_set_rel(struct mbuf *, int);
 
 #endif /* !_SYS_MBUF_H_ */


More information about the svn-src-projects mailing list