svn commit: r188121 - in projects/jeff_mbuf/sys: dev/cxgb dev/ti dev/vx dev/xen/netback kern net netgraph netinet nfsclient nfsserver sys vm

Jeff Roberson jeff at FreeBSD.org
Wed Feb 4 11:43:09 PST 2009


Author: jeff
Date: Wed Feb  4 19:43:08 2009
New Revision: 188121
URL: http://svn.freebsd.org/changeset/base/188121

Log:
  Initial import of ongoing mbuf work
   - Introduce a m_size field to simplify a great number of macros and
     consumer code.  Paves the way for more flexible mbuf sizes once
     the field is honored in all code.
   - Streamline mbuf allocation/free code.  Move things into functions
     that should not be inlined and functions to inlines when the
     calling footprint exceeds the function text size.
   - Reference count mbufs rather than clusters.  All mbufs have a ref
     so uma refcnt zones and the super expensive uma_find_refcnt() is
     not necessary.  Don't duplicate m_ext areas, call m_refm() to
     reference a specific data area of a target mbuf.
   - Retire UMA refcnt zones.  These were too inefficient to be useful.

Modified:
  projects/jeff_mbuf/sys/dev/cxgb/cxgb_sge.c
  projects/jeff_mbuf/sys/dev/ti/if_ti.c
  projects/jeff_mbuf/sys/dev/vx/if_vx.c
  projects/jeff_mbuf/sys/dev/xen/netback/netback.c
  projects/jeff_mbuf/sys/kern/kern_mbuf.c
  projects/jeff_mbuf/sys/kern/uipc_mbuf.c
  projects/jeff_mbuf/sys/kern/uipc_mbuf2.c
  projects/jeff_mbuf/sys/kern/uipc_sockbuf.c
  projects/jeff_mbuf/sys/net/if_gre.c
  projects/jeff_mbuf/sys/net/if_ppp.c
  projects/jeff_mbuf/sys/net/ppp_tty.c
  projects/jeff_mbuf/sys/netgraph/ng_tty.c
  projects/jeff_mbuf/sys/netinet/ip_options.c
  projects/jeff_mbuf/sys/netinet/sctp_os_bsd.h
  projects/jeff_mbuf/sys/nfsclient/nfsm_subs.h
  projects/jeff_mbuf/sys/nfsserver/nfsm_subs.h
  projects/jeff_mbuf/sys/sys/mbuf.h
  projects/jeff_mbuf/sys/sys/sockbuf.h
  projects/jeff_mbuf/sys/vm/uma.h
  projects/jeff_mbuf/sys/vm/uma_core.c
  projects/jeff_mbuf/sys/vm/uma_dbg.c
  projects/jeff_mbuf/sys/vm/uma_int.h

Modified: projects/jeff_mbuf/sys/dev/cxgb/cxgb_sge.c
==============================================================================
--- projects/jeff_mbuf/sys/dev/cxgb/cxgb_sge.c	Wed Feb  4 19:30:02 2009	(r188120)
+++ projects/jeff_mbuf/sys/dev/cxgb/cxgb_sge.c	Wed Feb  4 19:43:08 2009	(r188121)
@@ -515,7 +515,7 @@ refill_fl(adapter_t *sc, struct sge_fl *
 	struct refill_fl_cb_arg cb_arg;
 	caddr_t cl;
 	int err, count = 0;
-	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
+	int header_size = sizeof(struct mbuf) + sizeof(uint32_t);
 	
 	cb_arg.error = 0;
 	while (n--) {
@@ -2360,7 +2360,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int i
 	q->rspq.size = p->rspq_size;
 
 
-	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
+	header_size = sizeof(struct mbuf) + sizeof(uint32_t);
 	q->txq[TXQ_ETH].stop_thres = nports *
 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 
@@ -2535,8 +2535,7 @@ init_cluster_mbuf(caddr_t cl, int flags,
 	struct mbuf *m;
 	int header_size;
 	
-	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
-	    sizeof(struct m_ext_) + sizeof(uint32_t);
+	header_size = sizeof(struct mbuf) + sizeof(uint32_t);
 	
 	bzero(cl, header_size);
 	m = (struct mbuf *)cl;

Modified: projects/jeff_mbuf/sys/dev/ti/if_ti.c
==============================================================================
--- projects/jeff_mbuf/sys/dev/ti/if_ti.c	Wed Feb  4 19:30:02 2009	(r188120)
+++ projects/jeff_mbuf/sys/dev/ti/if_ti.c	Wed Feb  4 19:43:08 2009	(r188121)
@@ -1308,12 +1308,11 @@ ti_newbuf_mini(sc, i, m)
 		if (m_new == NULL) {
 			return (ENOBUFS);
 		}
-		m_new->m_len = m_new->m_pkthdr.len = MHLEN;
 	} else {
 		m_new = m;
-		m_new->m_data = m_new->m_pktdat;
-		m_new->m_len = m_new->m_pkthdr.len = MHLEN;
+		m_new->m_data = M_START(m);
 	}
+	m_new->m_len = m_new->m_pkthdr.len = m_new->m_size;
 
 	m_adj(m_new, ETHER_ALIGN);
 	r = &sc->ti_rdata->ti_rx_mini_ring[i];

Modified: projects/jeff_mbuf/sys/dev/vx/if_vx.c
==============================================================================
--- projects/jeff_mbuf/sys/dev/vx/if_vx.c	Wed Feb  4 19:30:02 2009	(r188120)
+++ projects/jeff_mbuf/sys/dev/vx/if_vx.c	Wed Feb  4 19:43:08 2009	(r188121)
@@ -825,8 +825,11 @@ vx_get(struct vx_softc *sc, u_int totlen
 		/* Convert one of our saved mbuf's. */
 		sc->vx_next_mb = (sc->vx_next_mb + 1) % MAX_MBS;
 		m->m_data = m->m_pktdat;
-		m->m_flags = M_PKTHDR;
-		bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
+		m->m_flags |= M_PKTHDR;
+		if (m_pkthdr_init(m, M_NOWAIT)) {
+			m_free(m);
+			return NULL;
+		}
 	}
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.len = totlen;

Modified: projects/jeff_mbuf/sys/dev/xen/netback/netback.c
==============================================================================
--- projects/jeff_mbuf/sys/dev/xen/netback/netback.c	Wed Feb  4 19:30:02 2009	(r188120)
+++ projects/jeff_mbuf/sys/dev/xen/netback/netback.c	Wed Feb  4 19:43:08 2009	(r188121)
@@ -873,8 +873,7 @@ netif_rx(netif_t *netif)
 		pkts_dequeued++;
 
 		/* Check if we need to copy the data */
-		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
-			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
+		if (M_WRITABLE(m) == 0 || m->m_next != NULL) {
 			struct mbuf *n;
 				
 			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",

Modified: projects/jeff_mbuf/sys/kern/kern_mbuf.c
==============================================================================
--- projects/jeff_mbuf/sys/kern/kern_mbuf.c	Wed Feb  4 19:30:02 2009	(r188120)
+++ projects/jeff_mbuf/sys/kern/kern_mbuf.c	Wed Feb  4 19:43:08 2009	(r188121)
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -210,27 +211,24 @@ SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbsta
 /*
  * Zones from which we allocate.
  */
-uma_zone_t	zone_mbuf;
 uma_zone_t	zone_clust;
-uma_zone_t	zone_pack;
 uma_zone_t	zone_jumbop;
 uma_zone_t	zone_jumbo9;
 uma_zone_t	zone_jumbo16;
-uma_zone_t	zone_ext_refcnt;
+uma_zone_t	zone_mbuf;
+uma_zone_t	zone_pack;
 
 /*
  * Local prototypes.
  */
-static int	mb_ctor_mbuf(void *, int, void *, int);
-static int	mb_ctor_clust(void *, int, void *, int);
-static int	mb_ctor_pack(void *, int, void *, int);
-static void	mb_dtor_mbuf(void *, int, void *);
-static void	mb_dtor_clust(void *, int, void *);
-static void	mb_dtor_pack(void *, int, void *);
-static int	mb_zinit_pack(void *, int, int);
-static void	mb_zfini_pack(void *, int);
-
+#ifdef INVARIANTS 
+static int	mb_ctor_pack(void *mem, int size, void *arg, int how);
+#endif
+static void	mb_dtor_pack(void *mem, int size, void *arg);
 static void	mb_reclaim(void *);
+static int	mb_zinit_pack(void *mem, int size, int how);
+static void	mb_zfini_pack(void *mem, int size);
+
 static void	mbuf_init(void *);
 static void    *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int);
 static void	mbuf_jumbo_free(void *, int, u_int8_t);
@@ -252,73 +250,66 @@ mbuf_init(void *dummy)
 	 * Configure UMA zones for Mbufs, Clusters, and Packets.
 	 */
 	zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
-	    mb_ctor_mbuf, mb_dtor_mbuf,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_ctor, trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL, NULL,
 #endif
 	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
 
 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
-	    mb_ctor_clust, mb_dtor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_ctor, trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET);
 	if (nmbclusters > 0)
 		uma_zone_set_max(zone_clust, nmbclusters);
 
-	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
+	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME,
+#ifdef INVARIANTS
+	    mb_ctor_pack,
+#else
+	    NULL,
+#endif
 	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
 
 	/* Make jumbo frame zone too. Page size, 9k and 16k. */
 	zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
-	    mb_ctor_clust, mb_dtor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_ctor, trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	if (nmbjumbop > 0)
 		uma_zone_set_max(zone_jumbop, nmbjumbop);
 
 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
-	    mb_ctor_clust, mb_dtor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_ctor, trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	if (nmbjumbo9 > 0)
 		uma_zone_set_max(zone_jumbo9, nmbjumbo9);
 	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
 	uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free);
 
 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
-	    mb_ctor_clust, mb_dtor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_ctor, trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	if (nmbjumbo16 > 0)
 		uma_zone_set_max(zone_jumbo16, nmbjumbo16);
 	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
 	uma_zone_set_freef(zone_jumbo16, mbuf_jumbo_free);
 
-	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
-	    NULL, NULL,
-	    NULL, NULL,
-	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
-
-	/* uma_prealloc() goes here... */
-
 	/*
 	 * Hook event handler for low-memory situation, used to
 	 * drain protocols and push data back to the caches (UMA
@@ -373,86 +364,18 @@ mbuf_jumbo_free(void *mem, int size, u_i
 	contigfree(mem, size, M_JUMBOFRAME);
 }
 
-/*
- * Constructor for Mbuf master zone.
- *
- * The 'arg' pointer points to a mb_args structure which
- * contains call-specific information required to support the
- * mbuf allocation API.  See mbuf.h.
- */
+#ifdef INVARIANTS
 static int
-mb_ctor_mbuf(void *mem, int size, void *arg, int how)
+mb_ctor_pack(void *mem, int size, void *arg, int how)
 {
 	struct mbuf *m;
-	struct mb_args *args;
-#ifdef MAC
-	int error;
-#endif
-	int flags;
-	short type;
 
-#ifdef INVARIANTS
-	trash_ctor(mem, size, arg, how);
-#endif
 	m = (struct mbuf *)mem;
-	args = (struct mb_args *)arg;
-	flags = args->flags;
-	type = args->type;
-
-	/*
-	 * The mbuf is initialized later.  The caller has the
-	 * responsibility to set up any MAC labels too.
-	 */
-	if (type == MT_NOINIT)
-		return (0);
+	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
 
-	m->m_next = NULL;
-	m->m_nextpkt = NULL;
-	m->m_len = 0;
-	m->m_flags = flags;
-	m->m_type = type;
-	if (flags & M_PKTHDR) {
-		m->m_data = m->m_pktdat;
-		m->m_pkthdr.rcvif = NULL;
-		m->m_pkthdr.header = NULL;
-		m->m_pkthdr.len = 0;
-		m->m_pkthdr.csum_flags = 0;
-		m->m_pkthdr.csum_data = 0;
-		m->m_pkthdr.tso_segsz = 0;
-		m->m_pkthdr.ether_vtag = 0;
-		m->m_pkthdr.flowid = 0;
-		SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
-		/* If the label init fails, fail the alloc */
-		error = mac_mbuf_init(m, how);
-		if (error)
-			return (error);
-#endif
-	} else
-		m->m_data = m->m_dat;
 	return (0);
 }
-
-/*
- * The Mbuf master zone destructor.
- */
-static void
-mb_dtor_mbuf(void *mem, int size, void *arg)
-{
-	struct mbuf *m;
-	unsigned long flags; 
-
-	m = (struct mbuf *)mem;
-	flags = (unsigned long)arg;
-
-	if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
-		m_tag_delete_chain(m, NULL);
-	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
-	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
-#ifdef INVARIANTS
-	trash_dtor(mem, size, arg);
 #endif
-}
 
 /*
  * The Mbuf Packet zone destructor.
@@ -463,21 +386,21 @@ mb_dtor_pack(void *mem, int size, void *
 	struct mbuf *m;
 
 	m = (struct mbuf *)mem;
-	if ((m->m_flags & M_PKTHDR) != 0)
-		m_tag_delete_chain(m, NULL);
-
 	/* Make sure we've got a clean cluster back. */
 	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
 	KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
-	KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
+	KASSERT(m->m_ext.ext_free == m_ext_free_nop,
+	    ("%s: ext_free != m_ext_free_nop", __func__));
 	KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
 	KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
-	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
-	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
-	KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
-#ifdef INVARIANTS
+	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES",
+	    __func__));
+	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET",
+	    __func__));
+#ifdef INVARIANTS 
 	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
 #endif
+
 	/*
 	 * If there are processes blocked on zone_clust, waiting for pages
 	 * to be freed up, * cause them to be woken up by draining the
@@ -491,85 +414,6 @@ mb_dtor_pack(void *mem, int size, void *
 }
 
 /*
- * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
- *
- * Here the 'arg' pointer points to the Mbuf which we
- * are configuring cluster storage for.  If 'arg' is
- * empty we allocate just the cluster without setting
- * the mbuf to it.  See mbuf.h.
- */
-static int
-mb_ctor_clust(void *mem, int size, void *arg, int how)
-{
-	struct mbuf *m;
-	u_int *refcnt;
-	int type;
-	uma_zone_t zone;
-
-#ifdef INVARIANTS
-	trash_ctor(mem, size, arg, how);
-#endif
-	switch (size) {
-	case MCLBYTES:
-		type = EXT_CLUSTER;
-		zone = zone_clust;
-		break;
-#if MJUMPAGESIZE != MCLBYTES
-	case MJUMPAGESIZE:
-		type = EXT_JUMBOP;
-		zone = zone_jumbop;
-		break;
-#endif
-	case MJUM9BYTES:
-		type = EXT_JUMBO9;
-		zone = zone_jumbo9;
-		break;
-	case MJUM16BYTES:
-		type = EXT_JUMBO16;
-		zone = zone_jumbo16;
-		break;
-	default:
-		panic("unknown cluster size");
-		break;
-	}
-
-	m = (struct mbuf *)arg;
-	refcnt = uma_find_refcnt(zone, mem);
-	*refcnt = 1;
-	if (m != NULL) {
-		m->m_ext.ext_buf = (caddr_t)mem;
-		m->m_data = m->m_ext.ext_buf;
-		m->m_flags |= M_EXT;
-		m->m_ext.ext_free = NULL;
-		m->m_ext.ext_arg1 = NULL;
-		m->m_ext.ext_arg2 = NULL;
-		m->m_ext.ext_size = size;
-		m->m_ext.ext_type = type;
-		m->m_ext.ref_cnt = refcnt;
-	}
-
-	return (0);
-}
-
-/*
- * The Mbuf Cluster zone destructor.
- */
-static void
-mb_dtor_clust(void *mem, int size, void *arg)
-{
-#ifdef INVARIANTS
-	uma_zone_t zone;
-
-	zone = m_getzone(size);
-	KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
-		("%s: refcnt incorrect %u", __func__,
-		 *(uma_find_refcnt(zone, mem))) );
-
-	trash_dtor(mem, size, arg);
-#endif
-}
-
-/*
  * The Packet secondary zone's init routine, executed on the
  * object's transition from mbuf keg slab to zone cache.
  */
@@ -579,13 +423,16 @@ mb_zinit_pack(void *mem, int size, int h
 	struct mbuf *m;
 
 	m = (struct mbuf *)mem;		/* m is virgin. */
-	if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
-	    m->m_ext.ext_buf == NULL)
+	/*
+	 * Allocate and attach the cluster to the ext.
+	 */
+	if ((mem = uma_zalloc(zone_clust, how)) == NULL)
 		return (ENOMEM);
-	m->m_ext.ext_type = EXT_PACKET;	/* Override. */
+	m_extadd(m, mem, MCLBYTES, m_ext_free_nop, NULL, NULL, 0, EXT_PACKET);
 #ifdef INVARIANTS
-	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
+	return trash_init(m->m_ext.ext_buf, MCLBYTES, how);
 #endif
+
 	return (0);
 }
 
@@ -608,57 +455,34 @@ mb_zfini_pack(void *mem, int size)
 #endif
 }
 
-/*
- * The "packet" keg constructor.
- */
-static int
-mb_ctor_pack(void *mem, int size, void *arg, int how)
+int
+m_pkthdr_init(struct mbuf *m, int how)
 {
-	struct mbuf *m;
-	struct mb_args *args;
 #ifdef MAC
 	int error;
 #endif
-	int flags;
-	short type;
-
-	m = (struct mbuf *)mem;
-	args = (struct mb_args *)arg;
-	flags = args->flags;
-	type = args->type;
 
-#ifdef INVARIANTS
-	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
-#endif
-	m->m_next = NULL;
-	m->m_nextpkt = NULL;
-	m->m_data = m->m_ext.ext_buf;
-	m->m_len = 0;
-	m->m_flags = (flags | M_EXT);
-	m->m_type = type;
-
-	if (flags & M_PKTHDR) {
-		m->m_pkthdr.rcvif = NULL;
-		m->m_pkthdr.len = 0;
-		m->m_pkthdr.header = NULL;
-		m->m_pkthdr.csum_flags = 0;
-		m->m_pkthdr.csum_data = 0;
-		m->m_pkthdr.tso_segsz = 0;
-		m->m_pkthdr.ether_vtag = 0;
-		m->m_pkthdr.flowid = 0;
-		SLIST_INIT(&m->m_pkthdr.tags);
+	m->m_data = m->m_pktdat;
+	SLIST_INIT(&m->m_pkthdr.tags);
+	m->m_pkthdr.rcvif = NULL;
+	m->m_pkthdr.header = NULL;
+	m->m_pkthdr.len = 0;
+	m->m_pkthdr.flowid = 0;
+	m->m_pkthdr.csum_flags = 0;
+	m->m_pkthdr.csum_data = 0;
+	m->m_pkthdr.tso_segsz = 0;
+	m->m_pkthdr.ether_vtag = 0;
 #ifdef MAC
-		/* If the label init fails, fail the alloc */
-		error = mac_mbuf_init(m, how);
-		if (error)
-			return (error);
+	/* If the label init fails, fail the alloc */
+	error = mac_mbuf_init(m, how);
+	if (error)
+		return (error);
 #endif
-	}
-	/* m_ext is already initialized. */
 
 	return (0);
 }
 
+
 /*
  * This is the protocol drain routine.
  *
@@ -680,3 +504,45 @@ mb_reclaim(void *junk)
 			if (pr->pr_drain != NULL)
 				(*pr->pr_drain)();
 }
+
+struct mbuf *
+_m_getjcl(int how, short type, int flags, int size, uma_zone_t zone,
+    int exttype)
+{
+	struct mbuf *m;
+	void *mem;
+
+	if (size == MCLBYTES)
+		return m_getcl(how, type, flags);
+	/*
+	 * Allocate the memory and header seperate for these sizes.
+	 */
+	mem = uma_zalloc(zone, how);
+	if (mem == NULL)
+		return (NULL);
+	m = m_alloc(zone_mbuf, 0, how, type, flags);
+	if (m == NULL) {
+		uma_zfree(zone, mem);
+		return (NULL);
+	}
+	m_extadd(m, mem, size, m_ext_free_zone, zone, mem, flags, exttype);
+
+	return (m);
+}
+
+void *
+_m_cljget(struct mbuf *m, int how, int size, uma_zone_t zone, int exttype)
+{
+	void *mem;
+
+	if (m && m->m_flags & M_EXT)
+		printf("%s: %p mbuf already has cluster\n", __func__, m);
+	if (m != NULL)
+		m->m_ext.ext_buf = NULL;
+	mem = uma_zalloc(zone, how);
+	if (mem == NULL)
+		return (NULL);
+	if (m)
+		m_extadd(m, mem, size, m_ext_free_zone, zone, mem, 0, exttype);
+	return (mem);
+}

Modified: projects/jeff_mbuf/sys/kern/uipc_mbuf.c
==============================================================================
--- projects/jeff_mbuf/sys/kern/uipc_mbuf.c	Wed Feb  4 19:30:02 2009	(r188120)
+++ projects/jeff_mbuf/sys/kern/uipc_mbuf.c	Wed Feb  4 19:43:08 2009	(r188121)
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -87,6 +88,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defrag
 	   &m_defragrandomfailures, 0, "");
 #endif
 
+static void m_refm(struct mbuf *mb, struct mbuf *m);
+
 /*
  * Allocate a given length worth of mbufs and/or clusters (whatever fits
  * best) and return a pointer to the top of the allocated chain.  If an
@@ -128,8 +131,7 @@ m_getm2(struct mbuf *m, int len, int how
 		}
 
 		/* Book keeping. */
-		len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size :
-			((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN);
+		len -= mb->m_size;
 		if (mtail != NULL)
 			mtail->m_next = mb;
 		else
@@ -164,147 +166,63 @@ m_freem(struct mbuf *mb)
 		mb = m_free(mb);
 }
 
-/*-
- * Configure a provided mbuf to refer to the provided external storage
- * buffer and setup a reference count for said buffer.  If the setting
- * up of the reference count fails, the M_EXT bit will not be set.  If
- * successfull, the M_EXT bit is set in the mbuf's flags.
- *
- * Arguments:
- *    mb     The existing mbuf to which to attach the provided buffer.
- *    buf    The address of the provided external storage buffer.
- *    size   The size of the provided buffer.
- *    freef  A pointer to a routine that is responsible for freeing the
- *           provided external storage buffer.
- *    args   A pointer to an argument structure (of any type) to be passed
- *           to the provided freef routine (may be NULL).
- *    flags  Any other flags to be passed to the provided mbuf.
- *    type   The type that the external storage buffer should be
- *           labeled with.
- *
- * Returns:
- *    Nothing.
+/*
+ * Reference the existing storage area of an mbuf.  The reference is readonly
+ * and the referenced data can not be freed until the referencing mbuf is
+ * freed.
  */
-void
-m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
-    void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type)
+static void
+m_refm(struct mbuf *mb, struct mbuf *m)
 {
-	KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
 
-	if (type != EXT_EXTREF)
-		mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT);
-	if (mb->m_ext.ref_cnt != NULL) {
-		*(mb->m_ext.ref_cnt) = 1;
-		mb->m_flags |= (M_EXT | flags);
-		mb->m_ext.ext_buf = buf;
-		mb->m_data = mb->m_ext.ext_buf;
-		mb->m_ext.ext_size = size;
-		mb->m_ext.ext_free = freef;
-		mb->m_ext.ext_arg1 = arg1;
-		mb->m_ext.ext_arg2 = arg2;
-		mb->m_ext.ext_type = type;
-        }
+	if (m->m_ref > 1)
+		atomic_add_int(&m->m_ref, 1);
+	else
+		m->m_ref++;
+	mb->m_flags |= M_EXT | M_RDONLY;
+	mb->m_data = m->m_data;
+	mb->m_size = m->m_len;	/* Only existing data is visible. */
+	mb->m_ext.ext_buf = m->m_data;
+	mb->m_ext.ext_size = m->m_len;
+	mb->m_ext.ext_free = m_ext_free_mbuf;
+	mb->m_ext.ext_arg1 = m;
+	mb->m_ext.ext_arg2 = NULL;
+	mb->m_ext.ext_type = EXT_MBUF;
+
+	CTR3(KTR_NET, "m_refm: %p ref %d buf %p",
+	    mb, mb->m_ref, mb->m_ext.ext_buf);
 }
 
 /*
- * Non-directly-exported function to clean up after mbufs with M_EXT
- * storage attached to them if the reference count hits 1.
+ * Free the ext area of a mbuf assuming a uma zone and argument are
+ * presented.
  */
 void
-mb_free_ext(struct mbuf *m)
+m_ext_free_zone(void *arg1, void *arg2)
 {
-	int skipmbuf;
-	
-	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
-	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
 
+	uma_zfree(arg1, arg2);
+}
+
+/*
+ * Free the ext area of a mbuf assuming it has been acquired with m_refm().
+ */
+void
+m_ext_free_mbuf(void *arg1, void *arg2)
+{
 
 	/*
-	 * check if the header is embedded in the cluster
-	 */     
-	skipmbuf = (m->m_flags & M_NOFREE);
-	
-	/* Free attached storage if this mbuf is the only reference to it. */
-	if (*(m->m_ext.ref_cnt) == 1 ||
-	    atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
-		switch (m->m_ext.ext_type) {
-		case EXT_PACKET:	/* The packet zone is special. */
-			if (*(m->m_ext.ref_cnt) == 0)
-				*(m->m_ext.ref_cnt) = 1;
-			uma_zfree(zone_pack, m);
-			return;		/* Job done. */
-		case EXT_CLUSTER:
-			uma_zfree(zone_clust, m->m_ext.ext_buf);
-			break;
-		case EXT_JUMBOP:
-			uma_zfree(zone_jumbop, m->m_ext.ext_buf);
-			break;
-		case EXT_JUMBO9:
-			uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
-			break;
-		case EXT_JUMBO16:
-			uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
-			break;
-		case EXT_SFBUF:
-		case EXT_NET_DRV:
-		case EXT_MOD_TYPE:
-		case EXT_DISPOSABLE:
-			*(m->m_ext.ref_cnt) = 0;
-			uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
-				m->m_ext.ref_cnt));
-			/* FALLTHROUGH */
-		case EXT_EXTREF:
-			KASSERT(m->m_ext.ext_free != NULL,
-				("%s: ext_free not set", __func__));
-			(*(m->m_ext.ext_free))(m->m_ext.ext_arg1,
-			    m->m_ext.ext_arg2);
-			break;
-		default:
-			KASSERT(m->m_ext.ext_type == 0,
-				("%s: unknown ext_type", __func__));
-		}
-	}
-	if (skipmbuf)
-		return;
-	
-	/*
-	 * Free this mbuf back to the mbuf zone with all m_ext
-	 * information purged.
+	 * Release one more reference to this mbuf.  If it is the last it
+	 * will be freed.
 	 */
-	m->m_ext.ext_buf = NULL;
-	m->m_ext.ext_free = NULL;
-	m->m_ext.ext_arg1 = NULL;
-	m->m_ext.ext_arg2 = NULL;
-	m->m_ext.ref_cnt = NULL;
-	m->m_ext.ext_size = 0;
-	m->m_ext.ext_type = 0;
-	m->m_flags &= ~M_EXT;
-	uma_zfree(zone_mbuf, m);
+	m_free(arg1);
 }
 
-/*
- * Attach the the cluster from *m to *n, set up m_ext in *n
- * and bump the refcount of the cluster.
- */
-static void
-mb_dupcl(struct mbuf *n, struct mbuf *m)
+void
+m_ext_free_nop(void *arg1, void *arg2)
 {
-	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
-	KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
-	KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
 
-	if (*(m->m_ext.ref_cnt) == 1)
-		*(m->m_ext.ref_cnt) += 1;
-	else
-		atomic_add_int(m->m_ext.ref_cnt, 1);
-	n->m_ext.ext_buf = m->m_ext.ext_buf;
-	n->m_ext.ext_free = m->m_ext.ext_free;
-	n->m_ext.ext_arg1 = m->m_ext.ext_arg1;
-	n->m_ext.ext_arg2 = m->m_ext.ext_arg2;
-	n->m_ext.ext_size = m->m_ext.ext_size;
-	n->m_ext.ref_cnt = m->m_ext.ref_cnt;
-	n->m_ext.ext_type = m->m_ext.ext_type;
-	n->m_flags |= M_EXT;
+	/* Nothing to do. */
 }
 
 /*
@@ -357,11 +275,8 @@ m_sanity(struct mbuf *m0, int sanitize)
 		 * unrelated kernel memory before or after us is trashed.
 		 * No way to recover from that.
 		 */
-		a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf :
-			((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) :
-			 (caddr_t)(&m->m_dat)) );
-		b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size :
-			((m->m_flags & M_PKTHDR) ? MHLEN : MLEN)));
+		a = M_START(m);
+		b = (caddr_t)(a + m->m_size);
 		if ((caddr_t)m->m_data < a)
 			M_SANITY_ACTION("m_data outside mbuf data range left");
 		if ((caddr_t)m->m_data > b)
@@ -532,6 +447,7 @@ m_copym(struct mbuf *m, int off0, int le
 	struct mbuf *top;
 	int copyhdr = 0;
 
+	CTR3(KTR_NET, "m_copym(%p, %d, %d)", m, off0, len);
 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
 	MBUF_CHECKSLEEP(wait);
@@ -568,13 +484,16 @@ m_copym(struct mbuf *m, int off0, int le
 				n->m_pkthdr.len = len;
 			copyhdr = 0;
 		}
+		/*
+		 * If the copied data will fit in the space of standard
+		 * mbuf prefer to copy rather than reference.
+		 */
 		n->m_len = min(len, m->m_len - off);
-		if (m->m_flags & M_EXT) {
-			n->m_data = m->m_data + off;
-			mb_dupcl(n, m);
+		if (n->m_len > n->m_size) {
+			m_refm(n, m);
+			n->m_data += off;
 		} else
-			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
-			    (u_int)n->m_len);
+			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), n->m_len);
 		if (len != M_COPYALL)
 			len -= n->m_len;
 		off = 0;
@@ -752,7 +671,9 @@ struct mbuf *
 m_copypacket(struct mbuf *m, int how)
 {
 	struct mbuf *top, *n, *o;
+	int leading;
 
+	CTR1(KTR_NET, "m_copypacket(%p)", m);
 	MBUF_CHECKSLEEP(how);
 	MGET(n, how, m->m_type);
 	top = n;
@@ -762,13 +683,10 @@ m_copypacket(struct mbuf *m, int how)
 	if (!m_dup_pkthdr(n, m, how))
 		goto nospace;
 	n->m_len = m->m_len;
-	if (m->m_flags & M_EXT) {
-		n->m_data = m->m_data;
-		mb_dupcl(n, m);
-	} else {
-		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
-		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
-	}
+	if (n->m_len > n->m_size)
+		m_refm(n, m);
+	else
+		bcopy(mtod(m, caddr_t), mtod(n, caddr_t), m->m_len);
 
 	m = m->m_next;
 	while (m) {
@@ -780,13 +698,13 @@ m_copypacket(struct mbuf *m, int how)
 		n = n->m_next;
 
 		n->m_len = m->m_len;
-		if (m->m_flags & M_EXT) {
-			n->m_data = m->m_data;
-			mb_dupcl(n, m);
+		leading = M_LEADINGSPACE(m);
+		if (n->m_len + leading > n->m_size) {
+			m_refm(n, m);
 		} else {
-			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
+			n->m_data = M_START(n) + leading;
+			bcopy(mtod(m, caddr_t), mtod(n, caddr_t), n->m_len);
 		}
-
 		m = m->m_next;
 	}
 	return top;
@@ -805,6 +723,7 @@ m_copydata(const struct mbuf *m, int off
 {
 	u_int count;
 
+	CTR3(KTR_NET, "m_copydata(%p, %d, %d)", m, off, len);
 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
 	while (off > 0) {
@@ -834,8 +753,9 @@ struct mbuf *
 m_dup(struct mbuf *m, int how)
 {
 	struct mbuf **p, *top = NULL;
-	int remain, moff, nsize;
+	int remain, moff;
 
+	CTR1(KTR_NET, "m_dup(%p)", m);
 	MBUF_CHECKSLEEP(how);
 	/* Sanity check */
 	if (m == NULL)
@@ -850,13 +770,10 @@ m_dup(struct mbuf *m, int how)
 		struct mbuf *n;
 
 		/* Get the next new mbuf */
-		if (remain >= MINCLSIZE) {
+		if (remain >= MINCLSIZE)
 			n = m_getcl(how, m->m_type, 0);
-			nsize = MCLBYTES;
-		} else {
+		else 
 			n = m_get(how, m->m_type);
-			nsize = MLEN;
-		}
 		if (n == NULL)
 			goto nospace;
 
@@ -865,8 +782,6 @@ m_dup(struct mbuf *m, int how)
 				m_free(n);
 				goto nospace;
 			}
-			if ((n->m_flags & M_EXT) == 0)
-				nsize = MHLEN;
 		}
 		n->m_len = 0;
 
@@ -875,8 +790,8 @@ m_dup(struct mbuf *m, int how)
 		p = &n->m_next;
 
 		/* Copy data from original mbuf(s) into new mbuf */
-		while (n->m_len < nsize && m != NULL) {
-			int chunk = min(nsize - n->m_len, m->m_len - moff);
+		while (n->m_len < n->m_size && m != NULL) {
+			int chunk = min(n->m_size - n->m_len, m->m_len - moff);
 
 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
 			moff += chunk;
@@ -908,11 +823,13 @@ nospace:
 void
 m_cat(struct mbuf *m, struct mbuf *n)
 {
+	CTR2(KTR_NET, "m_cat(%p, %p)", m, n);
+
 	while (m->m_next)
 		m = m->m_next;
 	while (n) {
 		if (m->m_flags & M_EXT ||
-		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+		    m->m_len + n->m_len > m->m_size) {
 			/* just join the two chains */
 			m->m_next = n;
 			return;
@@ -932,6 +849,7 @@ m_adj(struct mbuf *mp, int req_len)
 	struct mbuf *m;
 	int count;
 
+	CTR2(KTR_NET, "m_adj(%p, %d)", mp, req_len);
 	if ((m = mp) == NULL)
 		return;
 	if (len >= 0) {
@@ -1014,13 +932,13 @@ m_pullup(struct mbuf *n, int len)
 	int count;
 	int space;
 
+	CTR2(KTR_NET, "m_pullup(%p, %d)", n, len);
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
 	 * without shifting current data, pullup into it,
 	 * otherwise allocate a new mbuf to prepend to the chain.
 	 */
-	if ((n->m_flags & M_EXT) == 0 &&
-	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
+	if ((n->m_flags & M_EXT) == 0 && len < n->m_size && n->m_next) {
 		if (n->m_len >= len)
 			return (n);
 		m = n;
@@ -1036,7 +954,8 @@ m_pullup(struct mbuf *n, int len)
 		if (n->m_flags & M_PKTHDR)
 			M_MOVE_PKTHDR(m, n);
 	}
-	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+	/* XXX M_TRAILINGSPACE without M_WRITABLE */
+	space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
@@ -1075,6 +994,7 @@ m_copyup(struct mbuf *n, int len, int ds
 	struct mbuf *m;
 	int count, space;
 
+	CTR2(KTR_NET, "m_copyup(%p, %d)", n, len);
 	if (len > (MHLEN - dstoff))
 		goto bad;
 	MGET(m, M_DONTWAIT, n->m_type);
@@ -1084,7 +1004,8 @@ m_copyup(struct mbuf *n, int len, int ds
 	if (n->m_flags & M_PKTHDR)
 		M_MOVE_PKTHDR(m, n);
 	m->m_data += dstoff;
-	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+	/* XXX M_TRAILINGSPACE without M_WRITABLE */
+	space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
@@ -1126,6 +1047,7 @@ m_split(struct mbuf *m0, int len0, int w
 	struct mbuf *m, *n;
 	u_int len = len0, remain;
 
+	CTR2(KTR_NET, "m_split(%p, %d)", m0, len0);
 	MBUF_CHECKSLEEP(wait);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list