svn commit: r268535 - in head/sys: kern sys
Adrian Chadd
adrian at freebsd.org
Fri Jul 11 19:48:41 UTC 2014
I'm so glad this finally made it in!
Thanks!
-a
On 11 July 2014 12:40, Gleb Smirnoff <glebius at freebsd.org> wrote:
> Author: glebius
> Date: Fri Jul 11 19:40:50 2014
> New Revision: 268535
> URL: http://svnweb.freebsd.org/changeset/base/268535
>
> Log:
> Improve reference counting of EXT_SFBUF pages attached to mbufs.
>
> o Do not use UMA refcount zone. The problem with this zone is that
> several refcounting words (16 on amd64) share the same cache line,
> and issueing atomic(9) updates on them creates cache line contention.
> Also, allocating and freeing them is extra CPU cycles.
> Instead, refcount the page directly via vm_page_wire() and the sfbuf
> via sf_buf_alloc(sf_buf_page(sf)) [1].
>
> o Call refcounting/freeing function for EXT_SFBUF via direct function
> call, instead of function pointer. This removes barrier for CPU
> branch predictor.
>
> o Do not cleanup the mbuf to be freed in mb_free_ext(), merely to
> satisfy assertion in mb_dtor_mbuf(). Remove the assertion from
> mb_dtor_mbuf(). Use bcopy() instead of manual assignments to
> copy m_ext in mb_dupcl().
>
> [1] This has some problems for now. Using sf_buf_alloc() merely to
> increase refcount is expensive, and is broken on sparc64. To be
> fixed.
>
> Sponsored by: Netflix
> Sponsored by: Nginx, Inc.
>
> Modified:
> head/sys/kern/kern_mbuf.c
> head/sys/kern/uipc_mbuf.c
> head/sys/kern/uipc_syscalls.c
> head/sys/sys/mbuf.h
> head/sys/sys/sf_buf.h
>
> Modified: head/sys/kern/kern_mbuf.c
> ==============================================================================
> --- head/sys/kern/kern_mbuf.c Fri Jul 11 17:31:40 2014 (r268534)
> +++ head/sys/kern/kern_mbuf.c Fri Jul 11 19:40:50 2014 (r268535)
> @@ -449,7 +449,6 @@ mb_dtor_mbuf(void *mem, int size, void *
>
> if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
> m_tag_delete_chain(m, NULL);
> - KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
> KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
> #ifdef INVARIANTS
> trash_dtor(mem, size, arg);
>
> Modified: head/sys/kern/uipc_mbuf.c
> ==============================================================================
> --- head/sys/kern/uipc_mbuf.c Fri Jul 11 17:31:40 2014 (r268534)
> +++ head/sys/kern/uipc_mbuf.c Fri Jul 11 19:40:50 2014 (r268535)
> @@ -287,19 +287,31 @@ m_extadd(struct mbuf *mb, caddr_t buf, u
> void
> mb_free_ext(struct mbuf *m)
> {
> - int skipmbuf;
> + int freembuf;
>
> - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
> - KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__));
> + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
>
> /*
> - * check if the header is embedded in the cluster
> + * Check if the header is embedded in the cluster.
> */
> - skipmbuf = (m->m_flags & M_NOFREE);
> + freembuf = (m->m_flags & M_NOFREE) ? 0 : 1;
> +
> + switch (m->m_ext.ext_type) {
> + case EXT_SFBUF:
> + sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
> + break;
> + default:
> + KASSERT(m->m_ext.ext_cnt != NULL,
> + ("%s: no refcounting pointer on %p", __func__, m));
> + /*
> + * Free attached storage if this mbuf is the only
> + * reference to it.
> + */
> + if (*(m->m_ext.ext_cnt) != 1) {
> + if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1)
> + break;
> + }
>
> - /* Free attached storage if this mbuf is the only reference to it. */
> - if (*(m->m_ext.ext_cnt) == 1 ||
> - atomic_fetchadd_int(m->m_ext.ext_cnt, -1) == 1) {
> switch (m->m_ext.ext_type) {
> case EXT_PACKET: /* The packet zone is special. */
> if (*(m->m_ext.ext_cnt) == 0)
> @@ -318,7 +330,6 @@ mb_free_ext(struct mbuf *m)
> case EXT_JUMBO16:
> uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
> break;
> - case EXT_SFBUF:
> case EXT_NET_DRV:
> case EXT_MOD_TYPE:
> case EXT_DISPOSABLE:
> @@ -337,23 +348,9 @@ mb_free_ext(struct mbuf *m)
> ("%s: unknown ext_type", __func__));
> }
> }
> - if (skipmbuf)
> - return;
>
> - /*
> - * Free this mbuf back to the mbuf zone with all m_ext
> - * information purged.
> - */
> - m->m_ext.ext_buf = NULL;
> - m->m_ext.ext_free = NULL;
> - m->m_ext.ext_arg1 = NULL;
> - m->m_ext.ext_arg2 = NULL;
> - m->m_ext.ext_cnt = NULL;
> - m->m_ext.ext_size = 0;
> - m->m_ext.ext_type = 0;
> - m->m_ext.ext_flags = 0;
> - m->m_flags &= ~M_EXT;
> - uma_zfree(zone_mbuf, m);
> + if (freembuf)
> + uma_zfree(zone_mbuf, m);
> }
>
> /*
> @@ -363,22 +360,24 @@ mb_free_ext(struct mbuf *m)
> static void
> mb_dupcl(struct mbuf *n, struct mbuf *m)
> {
> - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
> - KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__));
> - KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
>
> - if (*(m->m_ext.ext_cnt) == 1)
> - *(m->m_ext.ext_cnt) += 1;
> - else
> - atomic_add_int(m->m_ext.ext_cnt, 1);
> - n->m_ext.ext_buf = m->m_ext.ext_buf;
> - n->m_ext.ext_free = m->m_ext.ext_free;
> - n->m_ext.ext_arg1 = m->m_ext.ext_arg1;
> - n->m_ext.ext_arg2 = m->m_ext.ext_arg2;
> - n->m_ext.ext_size = m->m_ext.ext_size;
> - n->m_ext.ext_cnt = m->m_ext.ext_cnt;
> - n->m_ext.ext_type = m->m_ext.ext_type;
> - n->m_ext.ext_flags = m->m_ext.ext_flags;
> + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
> + KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
> +
> + switch (m->m_ext.ext_type) {
> + case EXT_SFBUF:
> + sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
> + break;
> + default:
> + KASSERT(m->m_ext.ext_cnt != NULL,
> + ("%s: no refcounting pointer on %p", __func__, m));
> + if (*(m->m_ext.ext_cnt) == 1)
> + *(m->m_ext.ext_cnt) += 1;
> + else
> + atomic_add_int(m->m_ext.ext_cnt, 1);
> + }
> +
> + bcopy(&m->m_ext, &n->m_ext, sizeof(m->m_ext));
> n->m_flags |= M_EXT;
> n->m_flags |= m->m_flags & M_RDONLY;
> }
>
> Modified: head/sys/kern/uipc_syscalls.c
> ==============================================================================
> --- head/sys/kern/uipc_syscalls.c Fri Jul 11 17:31:40 2014 (r268534)
> +++ head/sys/kern/uipc_syscalls.c Fri Jul 11 19:40:50 2014 (r268535)
> @@ -1983,32 +1983,56 @@ filt_sfsync(struct knote *kn, long hint)
> return (ret);
> }
>
> +/*
> + * Add more references to a vm_page + sf_buf + sendfile_sync.
> + */
> +void
> +sf_ext_ref(void *arg1, void *arg2)
> +{
> + struct sf_buf *sf = arg1;
> + struct sendfile_sync *sfs = arg2;
> + vm_page_t pg = sf_buf_page(sf);
> +
> + /* XXXGL: there should be sf_buf_ref() */
> + sf_buf_alloc(sf_buf_page(sf), SFB_NOWAIT);
> +
> + vm_page_lock(pg);
> + vm_page_wire(pg);
> + vm_page_unlock(pg);
> +
> + if (sfs != NULL) {
> + mtx_lock(&sfs->mtx);
> + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
> + sfs->count++;
> + mtx_unlock(&sfs->mtx);
> + }
> +}
>
> /*
> * Detach mapped page and release resources back to the system.
> */
> void
> -sf_buf_mext(struct mbuf *mb, void *addr, void *args)
> +sf_ext_free(void *arg1, void *arg2)
> {
> - vm_page_t m;
> - struct sendfile_sync *sfs;
> + struct sf_buf *sf = arg1;
> + struct sendfile_sync *sfs = arg2;
> + vm_page_t pg = sf_buf_page(sf);
> +
> + sf_buf_free(sf);
>
> - m = sf_buf_page(args);
> - sf_buf_free(args);
> - vm_page_lock(m);
> - vm_page_unwire(m, PQ_INACTIVE);
> + vm_page_lock(pg);
> + vm_page_unwire(pg, PQ_INACTIVE);
> /*
> * Check for the object going away on us. This can
> * happen since we don't hold a reference to it.
> * If so, we're responsible for freeing the page.
> */
> - if (m->wire_count == 0 && m->object == NULL)
> - vm_page_free(m);
> - vm_page_unlock(m);
> - if (addr != NULL) {
> - sfs = addr;
> + if (pg->wire_count == 0 && pg->object == NULL)
> + vm_page_free(pg);
> + vm_page_unlock(pg);
> +
> + if (sfs != NULL)
> sf_sync_deref(sfs);
> - }
> }
>
> /*
> @@ -2124,7 +2148,7 @@ sf_sync_alloc(uint32_t flags)
> /*
> * Take a reference to a sfsync instance.
> *
> - * This has to map 1:1 to free calls coming in via sf_buf_mext(),
> + * This has to map 1:1 to free calls coming in via sf_ext_free(),
> * so typically this will be referenced once for each mbuf allocated.
> */
> void
> @@ -3062,17 +3086,19 @@ retry_space:
> m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
> if (m0 == NULL) {
> error = (mnw ? EAGAIN : ENOBUFS);
> - sf_buf_mext(NULL, NULL, sf);
> - break;
> - }
> - if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE,
> - sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF,
> - (mnw ? M_NOWAIT : M_WAITOK)) != 0) {
> - error = (mnw ? EAGAIN : ENOBUFS);
> - sf_buf_mext(NULL, NULL, sf);
> - m_freem(m0);
> + sf_ext_free(sf, NULL);
> break;
> }
> + /*
> + * Attach EXT_SFBUF external storage.
> + */
> + m0->m_ext.ext_buf = (caddr_t )sf_buf_kva(sf);
> + m0->m_ext.ext_size = PAGE_SIZE;
> + m0->m_ext.ext_arg1 = sf;
> + m0->m_ext.ext_arg2 = sfs;
> + m0->m_ext.ext_type = EXT_SFBUF;
> + m0->m_ext.ext_flags = 0;
> + m0->m_flags |= (M_EXT|M_RDONLY);
> m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
> m0->m_len = xfsize;
>
>
> Modified: head/sys/sys/mbuf.h
> ==============================================================================
> --- head/sys/sys/mbuf.h Fri Jul 11 17:31:40 2014 (r268534)
> +++ head/sys/sys/mbuf.h Fri Jul 11 19:40:50 2014 (r268535)
> @@ -374,6 +374,12 @@ struct mbuf {
> "\30EXT_FLAG_EXP4"
>
> /*
> + * External reference/free functions.
> + */
> +void sf_ext_ref(void *, void *);
> +void sf_ext_free(void *, void *);
> +
> +/*
> * Flags indicating checksum, segmentation and other offload work to be
> * done, or already done, by hardware or lower layers. It is split into
> * separate inbound and outbound flags.
>
> Modified: head/sys/sys/sf_buf.h
> ==============================================================================
> --- head/sys/sys/sf_buf.h Fri Jul 11 17:31:40 2014 (r268534)
> +++ head/sys/sys/sf_buf.h Fri Jul 11 19:40:50 2014 (r268535)
> @@ -52,7 +52,6 @@ struct sfstat { /* sendfile statistic
> #include <machine/sf_buf.h>
> #include <sys/systm.h>
> #include <sys/counter.h>
> -struct mbuf; /* for sf_buf_mext() */
>
> extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
> #define SFSTAT_ADD(name, val) \
> @@ -60,7 +59,4 @@ extern counter_u64_t sfstat[sizeof(struc
> (val))
> #define SFSTAT_INC(name) SFSTAT_ADD(name, 1)
> #endif /* _KERNEL */
> -
> -void sf_buf_mext(struct mbuf *mb, void *addr, void *args);
> -
> #endif /* !_SYS_SF_BUF_H_ */
>
More information about the svn-src-all
mailing list