svn commit: r187681 - in head/sys: kern vm
Jeff Roberson
jroberson at jroberson.net
Sun Jan 25 01:15:43 PST 2009
On Sun, 25 Jan 2009, Jeff Roberson wrote:
> Author: jeff
> Date: Sun Jan 25 09:11:24 2009
> New Revision: 187681
> URL: http://svn.freebsd.org/changeset/base/187681
>
> Log:
> - Make the keg abstraction more complete. Permit a zone to have multiple
> backend kegs so it may source compatible memory from multiple backends.
> This is useful for cases such as NUMA or different layouts for the same
> memory type.
> - Provide a new api for adding new backend kegs to secondary zones.
> - Provide a new flag for adjusting the layout of zones to stagger
> allocations better across cache lines.
There are currently no in-tree users for the new functionality provided by
this diff. Our network stack has other bottlenecks in play before
memory layout optimizations are helpful. However, I think anyone who has
looked at UMA internals as they have evolved over the last 7 years will
appreciate the refactoring.
Thanks,
Jeff
>
> Sponsored by: Nokia
>
> Modified:
> head/sys/kern/kern_malloc.c
> head/sys/vm/uma.h
> head/sys/vm/uma_core.c
> head/sys/vm/uma_dbg.c
> head/sys/vm/uma_int.h
>
> Modified: head/sys/kern/kern_malloc.c
> ==============================================================================
> --- head/sys/kern/kern_malloc.c Sun Jan 25 08:27:11 2009 (r187680)
> +++ head/sys/kern/kern_malloc.c Sun Jan 25 09:11:24 2009 (r187681)
> @@ -329,7 +329,6 @@ malloc(unsigned long size, struct malloc
> int indx;
> caddr_t va;
> uma_zone_t zone;
> - uma_keg_t keg;
> #if defined(DIAGNOSTIC) || defined(DEBUG_REDZONE)
> unsigned long osize = size;
> #endif
> @@ -378,18 +377,16 @@ malloc(unsigned long size, struct malloc
> size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
> indx = kmemsize[size >> KMEM_ZSHIFT];
> zone = kmemzones[indx].kz_zone;
> - keg = zone->uz_keg;
> #ifdef MALLOC_PROFILE
> krequests[size >> KMEM_ZSHIFT]++;
> #endif
> va = uma_zalloc(zone, flags);
> if (va != NULL)
> - size = keg->uk_size;
> + size = zone->uz_size;
> malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
> } else {
> size = roundup(size, PAGE_SIZE);
> zone = NULL;
> - keg = NULL;
> va = uma_large_malloc(size, flags);
> malloc_type_allocated(mtp, va == NULL ? 0 : size);
> }
>
> Modified: head/sys/vm/uma.h
> ==============================================================================
> --- head/sys/vm/uma.h Sun Jan 25 08:27:11 2009 (r187680)
> +++ head/sys/vm/uma.h Sun Jan 25 09:11:24 2009 (r187681)
> @@ -205,6 +205,17 @@ uma_zone_t uma_zsecond_create(char *name
> uma_init zinit, uma_fini zfini, uma_zone_t master);
>
> /*
> + * Add a second master to a secondary zone. This provides multiple data
> + * backends for objects with the same size. Both masters must have
> + * compatible allocation flags. Presently, UMA_ZONE_MALLOC type zones are
> + * the only supported.
> + *
> + * Returns:
> + * Error on failure, 0 on success.
> + */
> +int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
> +
> +/*
> * Definitions for uma_zcreate flags
> *
> * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to
> @@ -230,6 +241,22 @@ uma_zone_t uma_zsecond_create(char *name
> #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */
> #define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */
> #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */
> +#define UMA_ZONE_CACHESPREAD 0x1000 /*
> + * Spread memory start locations across
> + * all possible cache lines. May
> + * require many virtually contiguous
> + * backend pages and can fail early.
> + */
> +#define UMA_ZONE_VTOSLAB 0x2000 /* Zone uses vtoslab for lookup. */
> +
> +/*
> + * These flags are shared between the keg and zone. In zones wishing to add
> + * new kegs these flags must be compatible. Some are determined based on
> + * physical parameters of the request and may not be provided by the consumer.
> + */
> +#define UMA_ZONE_INHERIT \
> + (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | \
> + UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
>
> /* Definitions for align */
> #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */
>
> Modified: head/sys/vm/uma_core.c
> ==============================================================================
> --- head/sys/vm/uma_core.c Sun Jan 25 08:27:11 2009 (r187680)
> +++ head/sys/vm/uma_core.c Sun Jan 25 09:11:24 2009 (r187681)
> @@ -1,5 +1,5 @@
> /*-
> - * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff at FreeBSD.org>
> + * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff at FreeBSD.org>
> * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic at FreeBSD.org>
> * Copyright (c) 2004-2006 Robert N. M. Watson
> * All rights reserved.
> @@ -112,7 +112,7 @@ static uma_zone_t slabrefzone; /* With r
> static uma_zone_t hashzone;
>
> /* The boot-time adjusted value for cache line alignment. */
> -static int uma_align_cache = 16 - 1;
> +static int uma_align_cache = 64 - 1;
>
> static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
>
> @@ -212,7 +212,7 @@ static void *obj_alloc(uma_zone_t, int,
> static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
> static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
> static void page_free(void *, int, u_int8_t);
> -static uma_slab_t slab_zalloc(uma_zone_t, int);
> +static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
> static void cache_drain(uma_zone_t);
> static void bucket_drain(uma_zone_t, uma_bucket_t);
> static void bucket_cache_drain(uma_zone_t zone);
> @@ -221,8 +221,8 @@ static void keg_dtor(void *, int, void *
> static int zone_ctor(void *, int, void *, int);
> static void zone_dtor(void *, int, void *);
> static int zero_init(void *, int, int);
> -static void zone_small_init(uma_zone_t zone);
> -static void zone_large_init(uma_zone_t zone);
> +static void keg_small_init(uma_keg_t keg);
> +static void keg_large_init(uma_keg_t keg);
> static void zone_foreach(void (*zfunc)(uma_zone_t));
> static void zone_timeout(uma_zone_t zone);
> static int hash_alloc(struct uma_hash *);
> @@ -230,19 +230,22 @@ static int hash_expand(struct uma_hash *
> static void hash_free(struct uma_hash *hash);
> static void uma_timeout(void *);
> static void uma_startup3(void);
> -static void *uma_zalloc_internal(uma_zone_t, void *, int);
> -static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
> +static void *zone_alloc_item(uma_zone_t, void *, int);
> +static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
> int);
> static void bucket_enable(void);
> static void bucket_init(void);
> static uma_bucket_t bucket_alloc(int, int);
> static void bucket_free(uma_bucket_t);
> static void bucket_zone_drain(void);
> -static int uma_zalloc_bucket(uma_zone_t zone, int flags);
> -static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
> -static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
> -static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
> +static int zone_alloc_bucket(uma_zone_t zone, int flags);
> +static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
> +static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
> +static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
> +static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
> uma_fini fini, int align, u_int32_t flags);
> +static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
> +static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
>
> void uma_print_zone(uma_zone_t);
> void uma_print_stats(void);
> @@ -291,7 +294,8 @@ bucket_init(void)
> size = roundup(sizeof(struct uma_bucket), sizeof(void *));
> size += sizeof(void *) * ubz->ubz_entries;
> ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
> - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
> + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
> + UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
> for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
> bucket_size[i >> BUCKET_SHIFT] = j;
> }
> @@ -326,7 +330,7 @@ bucket_alloc(int entries, int bflags)
> return (NULL);
>
> ubz = bucket_zone_lookup(entries);
> - bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
> + bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
> if (bucket) {
> #ifdef INVARIANTS
> bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
> @@ -344,7 +348,7 @@ bucket_free(uma_bucket_t bucket)
> struct uma_bucket_zone *ubz;
>
> ubz = bucket_zone_lookup(bucket->ub_entries);
> - uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
> + zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
> ZFREE_STATFREE);
> }
>
> @@ -357,6 +361,21 @@ bucket_zone_drain(void)
> zone_drain(ubz->ubz_zone);
> }
>
> +static inline uma_keg_t
> +zone_first_keg(uma_zone_t zone)
> +{
> +
> + return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
> +}
> +
> +static void
> +zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
> +{
> + uma_klink_t klink;
> +
> + LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
> + kegfn(klink->kl_keg);
> +}
>
> /*
> * Routine called by timeout which is used to fire off some time interval
> @@ -382,29 +401,20 @@ uma_timeout(void *unused)
> * Routine to perform timeout driven calculations. This expands the
> * hashes and does per cpu statistics aggregation.
> *
> - * Arguments:
> - * zone The zone to operate on
> - *
> - * Returns:
> - * Nothing
> + * Returns nothing.
> */
> static void
> -zone_timeout(uma_zone_t zone)
> +keg_timeout(uma_keg_t keg)
> {
> - uma_keg_t keg;
> - u_int64_t alloc;
> -
> - keg = zone->uz_keg;
> - alloc = 0;
>
> + KEG_LOCK(keg);
> /*
> - * Expand the zone hash table.
> + * Expand the keg hash table.
> *
> * This is done if the number of slabs is larger than the hash size.
> * What I'm trying to do here is completely reduce collisions. This
> * may be a little aggressive. Should I allow for two collisions max?
> */
> - ZONE_LOCK(zone);
> if (keg->uk_flags & UMA_ZONE_HASH &&
> keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
> struct uma_hash newhash;
> @@ -413,14 +423,14 @@ zone_timeout(uma_zone_t zone)
>
> /*
> * This is so involved because allocating and freeing
> - * while the zone lock is held will lead to deadlock.
> + * while the keg lock is held will lead to deadlock.
> * I have to do everything in stages and check for
> * races.
> */
> newhash = keg->uk_hash;
> - ZONE_UNLOCK(zone);
> + KEG_UNLOCK(keg);
> ret = hash_alloc(&newhash);
> - ZONE_LOCK(zone);
> + KEG_LOCK(keg);
> if (ret) {
> if (hash_expand(&keg->uk_hash, &newhash)) {
> oldhash = keg->uk_hash;
> @@ -428,12 +438,19 @@ zone_timeout(uma_zone_t zone)
> } else
> oldhash = newhash;
>
> - ZONE_UNLOCK(zone);
> + KEG_UNLOCK(keg);
> hash_free(&oldhash);
> - ZONE_LOCK(zone);
> + KEG_LOCK(keg);
> }
> }
> - ZONE_UNLOCK(zone);
> + KEG_UNLOCK(keg);
> +}
> +
> +static void
> +zone_timeout(uma_zone_t zone)
> +{
> +
> + zone_foreach_keg(zone, &keg_timeout);
> }
>
> /*
> @@ -462,7 +479,7 @@ hash_alloc(struct uma_hash *hash)
> M_UMAHASH, M_NOWAIT);
> } else {
> alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
> - hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
> + hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
> M_WAITOK);
> hash->uh_hashsize = UMA_HASH_SIZE_INIT;
> }
> @@ -535,7 +552,7 @@ hash_free(struct uma_hash *hash)
> if (hash->uh_slab_hash == NULL)
> return;
> if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
> - uma_zfree_internal(hashzone,
> + zone_free_item(hashzone,
> hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
> else
> free(hash->uh_slab_hash, M_UMAHASH);
> @@ -555,20 +572,11 @@ hash_free(struct uma_hash *hash)
> static void
> bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
> {
> - uma_slab_t slab;
> - int mzone;
> void *item;
>
> if (bucket == NULL)
> return;
>
> - slab = NULL;
> - mzone = 0;
> -
> - /* We have to lookup the slab again for malloc.. */
> - if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
> - mzone = 1;
> -
> while (bucket->ub_cnt > 0) {
> bucket->ub_cnt--;
> item = bucket->ub_bucket[bucket->ub_cnt];
> @@ -577,15 +585,7 @@ bucket_drain(uma_zone_t zone, uma_bucket
> KASSERT(item != NULL,
> ("bucket_drain: botched ptr, item is NULL"));
> #endif
> - /*
> - * This is extremely inefficient. The slab pointer was passed
> - * to uma_zfree_arg, but we lost it because the buckets don't
> - * hold them. This will go away when free() gets a size passed
> - * to it.
> - */
> - if (mzone)
> - slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
> - uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
> + zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
> }
> }
>
> @@ -665,42 +665,32 @@ bucket_cache_drain(uma_zone_t zone)
> }
>
> /*
> - * Frees pages from a zone back to the system. This is done on demand from
> + * Frees pages from a keg back to the system. This is done on demand from
> * the pageout daemon.
> *
> - * Arguments:
> - * zone The zone to free pages from
> - * all Should we drain all items?
> - *
> - * Returns:
> - * Nothing.
> + * Returns nothing.
> */
> -void
> -zone_drain(uma_zone_t zone)
> +static void
> +keg_drain(uma_keg_t keg)
> {
> struct slabhead freeslabs = { 0 };
> - uma_keg_t keg;
> uma_slab_t slab;
> uma_slab_t n;
> u_int8_t flags;
> u_int8_t *mem;
> int i;
>
> - keg = zone->uz_keg;
> -
> /*
> - * We don't want to take pages from statically allocated zones at this
> + * We don't want to take pages from statically allocated kegs at this
> * time
> */
> if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
> return;
>
> - ZONE_LOCK(zone);
> -
> #ifdef UMA_DEBUG
> - printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
> + printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
> #endif
> - bucket_cache_drain(zone);
> + KEG_LOCK(keg);
> if (keg->uk_free == 0)
> goto finished;
>
> @@ -726,7 +716,7 @@ zone_drain(uma_zone_t zone)
> slab = n;
> }
> finished:
> - ZONE_UNLOCK(zone);
> + KEG_UNLOCK(keg);
>
> while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
> SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
> @@ -738,8 +728,7 @@ finished:
> flags = slab->us_flags;
> mem = slab->us_data;
>
> - if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
> - (keg->uk_flags & UMA_ZONE_REFCNT)) {
> + if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
> vm_object_t obj;
>
> if (flags & UMA_SLAB_KMEM)
> @@ -753,21 +742,61 @@ finished:
> obj);
> }
> if (keg->uk_flags & UMA_ZONE_OFFPAGE)
> - uma_zfree_internal(keg->uk_slabzone, slab, NULL,
> + zone_free_item(keg->uk_slabzone, slab, NULL,
> SKIP_NONE, ZFREE_STATFREE);
> #ifdef UMA_DEBUG
> printf("%s: Returning %d bytes.\n",
> - zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
> + keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
> #endif
> keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
> }
> }
>
> +static void
> +zone_drain_wait(uma_zone_t zone, int waitok)
> +{
> +
> + /*
> + * Set draining to interlock with zone_dtor() so we can release our
> + * locks as we go. Only dtor() should do a WAITOK call since it
> + * is the only call that knows the structure will still be available
> + * when it wakes up.
> + */
> + ZONE_LOCK(zone);
> + while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
> + if (waitok == M_NOWAIT)
> + goto out;
> + mtx_unlock(&uma_mtx);
> + msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
> + mtx_lock(&uma_mtx);
> + }
> + zone->uz_flags |= UMA_ZFLAG_DRAINING;
> + bucket_cache_drain(zone);
> + ZONE_UNLOCK(zone);
> + /*
> + * The DRAINING flag protects us from being freed while
> + * we're running. Normally the uma_mtx would protect us but we
> + * must be able to release and acquire the right lock for each keg.
> + */
> + zone_foreach_keg(zone, &keg_drain);
> + ZONE_LOCK(zone);
> + zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
> + wakeup(zone);
> +out:
> + ZONE_UNLOCK(zone);
> +}
> +
> +void
> +zone_drain(uma_zone_t zone)
> +{
> +
> + zone_drain_wait(zone, M_NOWAIT);
> +}
> +
> /*
> - * Allocate a new slab for a zone. This does not insert the slab onto a list.
> + * Allocate a new slab for a keg. This does not insert the slab onto a list.
> *
> * Arguments:
> - * zone The zone to allocate slabs for
> * wait Shall we wait?
> *
> * Returns:
> @@ -775,27 +804,28 @@ finished:
> * caller specified M_NOWAIT.
> */
> static uma_slab_t
> -slab_zalloc(uma_zone_t zone, int wait)
> +keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
> {
> uma_slabrefcnt_t slabref;
> + uma_alloc allocf;
> uma_slab_t slab;
> - uma_keg_t keg;
> u_int8_t *mem;
> u_int8_t flags;
> int i;
>
> + mtx_assert(&keg->uk_lock, MA_OWNED);
> slab = NULL;
> - keg = zone->uz_keg;
>
> #ifdef UMA_DEBUG
> - printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
> + printf("slab_zalloc: Allocating a new slab for %s\n", keg->uk_name);
> #endif
> - ZONE_UNLOCK(zone);
> + allocf = keg->uk_allocf;
> + KEG_UNLOCK(keg);
>
> if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
> - slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
> + slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
> if (slab == NULL) {
> - ZONE_LOCK(zone);
> + KEG_LOCK(keg);
> return NULL;
> }
> }
> @@ -812,13 +842,13 @@ slab_zalloc(uma_zone_t zone, int wait)
> else
> wait &= ~M_ZERO;
>
> - mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
> - &flags, wait);
> + /* zone is passed for legacy reasons. */
> + mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
> if (mem == NULL) {
> if (keg->uk_flags & UMA_ZONE_OFFPAGE)
> - uma_zfree_internal(keg->uk_slabzone, slab, NULL,
> + zone_free_item(keg->uk_slabzone, slab, NULL,
> SKIP_NONE, ZFREE_STATFREE);
> - ZONE_LOCK(zone);
> + KEG_LOCK(keg);
> return (NULL);
> }
>
> @@ -826,8 +856,7 @@ slab_zalloc(uma_zone_t zone, int wait)
> if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
> slab = (uma_slab_t )(mem + keg->uk_pgoff);
>
> - if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
> - (keg->uk_flags & UMA_ZONE_REFCNT))
> + if (keg->uk_flags & UMA_ZONE_VTOSLAB)
> for (i = 0; i < keg->uk_ppera; i++)
> vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
>
> @@ -860,8 +889,7 @@ slab_zalloc(uma_zone_t zone, int wait)
> (keg->uk_rsize * i),
> keg->uk_size);
> }
> - if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
> - (keg->uk_flags & UMA_ZONE_REFCNT)) {
> + if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
> vm_object_t obj;
>
> if (flags & UMA_SLAB_KMEM)
> @@ -875,15 +903,15 @@ slab_zalloc(uma_zone_t zone, int wait)
> (i * PAGE_SIZE), obj);
> }
> if (keg->uk_flags & UMA_ZONE_OFFPAGE)
> - uma_zfree_internal(keg->uk_slabzone, slab,
> + zone_free_item(keg->uk_slabzone, slab,
> NULL, SKIP_NONE, ZFREE_STATFREE);
> keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
> flags);
> - ZONE_LOCK(zone);
> + KEG_LOCK(keg);
> return (NULL);
> }
> }
> - ZONE_LOCK(zone);
> + KEG_LOCK(keg);
>
> if (keg->uk_flags & UMA_ZONE_HASH)
> UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
> @@ -905,7 +933,7 @@ startup_alloc(uma_zone_t zone, int bytes
> uma_keg_t keg;
> uma_slab_t tmps;
>
> - keg = zone->uz_keg;
> + keg = zone_first_keg(zone);
>
> /*
> * Check our small startup cache to see if it has pages remaining.
> @@ -935,7 +963,6 @@ startup_alloc(uma_zone_t zone, int bytes
> * Allocates a number of pages from the system
> *
> * Arguments:
> - * zone Unused
> * bytes The number of bytes requested
> * wait Shall we wait?
> *
> @@ -958,7 +985,6 @@ page_alloc(uma_zone_t zone, int bytes, u
> * Allocates a number of pages from within an object
> *
> * Arguments:
> - * zone Unused
> * bytes The number of bytes requested
> * wait Shall we wait?
> *
> @@ -973,8 +999,10 @@ obj_alloc(uma_zone_t zone, int bytes, u_
> vm_offset_t retkva, zkva;
> vm_page_t p;
> int pages, startpages;
> + uma_keg_t keg;
>
> - object = zone->uz_keg->uk_obj;
> + keg = zone_first_keg(zone);
> + object = keg->uk_obj;
> retkva = 0;
>
> /*
> @@ -984,7 +1012,7 @@ obj_alloc(uma_zone_t zone, int bytes, u_
> p = TAILQ_LAST(&object->memq, pglist);
> pages = p != NULL ? p->pindex + 1 : 0;
> startpages = pages;
> - zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
> + zkva = keg->uk_kva + pages * PAGE_SIZE;
> for (; bytes > 0; bytes -= PAGE_SIZE) {
> p = vm_page_alloc(object, pages,
> VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
> @@ -1052,25 +1080,23 @@ zero_init(void *mem, int size, int flags
> }
>
> /*
> - * Finish creating a small uma zone. This calculates ipers, and the zone size.
> + * Finish creating a small uma keg. This calculates ipers, and the keg size.
> *
> * Arguments
> - * zone The zone we should initialize
> + * keg The zone we should initialize
> *
> * Returns
> * Nothing
> */
> static void
> -zone_small_init(uma_zone_t zone)
> +keg_small_init(uma_keg_t keg)
> {
> - uma_keg_t keg;
> u_int rsize;
> u_int memused;
> u_int wastedspace;
> u_int shsize;
>
> - keg = zone->uz_keg;
> - KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
> + KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
> rsize = keg->uk_size;
>
> if (rsize < UMA_SMALLEST_UNIT)
> @@ -1090,7 +1116,7 @@ zone_small_init(uma_zone_t zone)
> }
>
> keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
> - KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
> + KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
> memused = keg->uk_ipers * rsize + shsize;
> wastedspace = UMA_SLAB_SIZE - memused;
>
> @@ -1109,44 +1135,41 @@ zone_small_init(uma_zone_t zone)
> (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
> keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
> KASSERT(keg->uk_ipers <= 255,
> - ("zone_small_init: keg->uk_ipers too high!"));
> + ("keg_small_init: keg->uk_ipers too high!"));
> #ifdef UMA_DEBUG
> printf("UMA decided we need offpage slab headers for "
> - "zone: %s, calculated wastedspace = %d, "
> + "keg: %s, calculated wastedspace = %d, "
> "maximum wasted space allowed = %d, "
> "calculated ipers = %d, "
> - "new wasted space = %d\n", zone->uz_name, wastedspace,
> + "new wasted space = %d\n", keg->uk_name, wastedspace,
> UMA_MAX_WASTE, keg->uk_ipers,
> UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
> #endif
> keg->uk_flags |= UMA_ZONE_OFFPAGE;
> - if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
> + if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
> keg->uk_flags |= UMA_ZONE_HASH;
> }
> }
>
> /*
> - * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
> + * Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
> * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
> * more complicated.
> *
> * Arguments
> - * zone The zone we should initialize
> + * keg The keg we should initialize
> *
> * Returns
> * Nothing
> */
> static void
> -zone_large_init(uma_zone_t zone)
> +keg_large_init(uma_keg_t keg)
> {
> - uma_keg_t keg;
> int pages;
>
> - keg = zone->uz_keg;
> -
> - KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
> + KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
> KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
> - ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
> + ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
>
> pages = keg->uk_size / UMA_SLAB_SIZE;
>
> @@ -1158,12 +1181,44 @@ zone_large_init(uma_zone_t zone)
> keg->uk_ipers = 1;
>
> keg->uk_flags |= UMA_ZONE_OFFPAGE;
> - if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
> + if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
> keg->uk_flags |= UMA_ZONE_HASH;
>
> keg->uk_rsize = keg->uk_size;
> }
>
> +static void
> +keg_cachespread_init(uma_keg_t keg)
> +{
> + int alignsize;
> + int trailer;
> + int pages;
> + int rsize;
> +
> + alignsize = keg->uk_align + 1;
> + rsize = keg->uk_size;
> + /*
> + * We want one item to start on every align boundary in a page. To
> + * do this we will span pages. We will also extend the item by the
> + * size of align if it is an even multiple of align. Otherwise, it
> + * would fall on the same boundary every time.
> + */
> + if (rsize & keg->uk_align)
> + rsize = (rsize & ~keg->uk_align) + alignsize;
> + if ((rsize & alignsize) == 0)
> + rsize += alignsize;
> + trailer = rsize - keg->uk_size;
> + pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
> + pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
> + keg->uk_rsize = rsize;
> + keg->uk_ppera = pages;
> + keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
> + keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
> + KASSERT(keg->uk_ipers <= uma_max_ipers,
> + ("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
> + keg->uk_ipers));
> +}
> +
> /*
> * Keg header ctor. This initializes all fields, locks, etc. And inserts
> * the keg onto the global keg list.
> @@ -1195,7 +1250,7 @@ keg_ctor(void *mem, int size, void *udat
> * The master zone is passed to us at keg-creation time.
> */
> zone = arg->zone;
> - zone->uz_keg = keg;
> + keg->uk_name = zone->uz_name;
>
> if (arg->flags & UMA_ZONE_VM)
> keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
> @@ -1203,24 +1258,31 @@ keg_ctor(void *mem, int size, void *udat
> if (arg->flags & UMA_ZONE_ZINIT)
> keg->uk_init = zero_init;
>
> + if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
> + keg->uk_flags |= UMA_ZONE_VTOSLAB;
> +
> /*
> * The +UMA_FRITM_SZ added to uk_size is to account for the
> - * linkage that is added to the size in zone_small_init(). If
> + * linkage that is added to the size in keg_small_init(). If
> * we don't account for this here then we may end up in
> - * zone_small_init() with a calculated 'ipers' of 0.
> + * keg_small_init() with a calculated 'ipers' of 0.
> */
> if (keg->uk_flags & UMA_ZONE_REFCNT) {
> - if ((keg->uk_size+UMA_FRITMREF_SZ) >
> + if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
> + keg_cachespread_init(keg);
> + else if ((keg->uk_size+UMA_FRITMREF_SZ) >
> (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
> - zone_large_init(zone);
> + keg_large_init(keg);
> else
> - zone_small_init(zone);
> + keg_small_init(keg);
> } else {
> - if ((keg->uk_size+UMA_FRITM_SZ) >
> + if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
> + keg_cachespread_init(keg);
> + else if ((keg->uk_size+UMA_FRITM_SZ) >
> (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
> - zone_large_init(zone);
> + keg_large_init(keg);
> else
> - zone_small_init(zone);
> + keg_small_init(keg);
> }
>
> if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
> @@ -1244,14 +1306,12 @@ keg_ctor(void *mem, int size, void *udat
> }
>
> /*
> - * Initialize keg's lock (shared among zones) through
> - * Master zone
> + * Initialize keg's lock (shared among zones).
> */
> - zone->uz_lock = &keg->uk_lock;
> if (arg->flags & UMA_ZONE_MTXCLASS)
> - ZONE_LOCK_INIT(zone, 1);
> + KEG_LOCK_INIT(keg, 1);
> else
> - ZONE_LOCK_INIT(zone, 0);
> + KEG_LOCK_INIT(keg, 0);
>
> /*
> * If we're putting the slab header in the actual page we need to
> @@ -1300,10 +1360,10 @@ keg_ctor(void *mem, int size, void *udat
> hash_alloc(&keg->uk_hash);
>
> #ifdef UMA_DEBUG
> - printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
> - zone->uz_name, zone,
> - keg->uk_size, keg->uk_ipers,
> - keg->uk_ppera, keg->uk_pgoff);
> + printf("UMA: %s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
> + zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
> + keg->uk_ipers, keg->uk_ppera,
> + (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
> #endif
>
> LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
> @@ -1320,7 +1380,6 @@ keg_ctor(void *mem, int size, void *udat
> * Arguments/Returns follow uma_ctor specifications
> * udata Actually uma_zctor_args
> */
> -
> static int
> zone_ctor(void *mem, int size, void *udata, int flags)
> {
> @@ -1333,23 +1392,24 @@ zone_ctor(void *mem, int size, void *uda
> zone->uz_name = arg->name;
> zone->uz_ctor = arg->ctor;
> zone->uz_dtor = arg->dtor;
> + zone->uz_slab = zone_fetch_slab;
> zone->uz_init = NULL;
> zone->uz_fini = NULL;
> zone->uz_allocs = 0;
> zone->uz_frees = 0;
> zone->uz_fails = 0;
> zone->uz_fills = zone->uz_count = 0;
> + zone->uz_flags = 0;
> + keg = arg->keg;
>
> if (arg->flags & UMA_ZONE_SECONDARY) {
> KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
> - keg = arg->keg;
> - zone->uz_keg = keg;
> zone->uz_init = arg->uminit;
> zone->uz_fini = arg->fini;
> zone->uz_lock = &keg->uk_lock;
> + zone->uz_flags |= UMA_ZONE_SECONDARY;
> mtx_lock(&uma_mtx);
> ZONE_LOCK(zone);
> - keg->uk_flags |= UMA_ZONE_SECONDARY;
> LIST_FOREACH(z, &keg->uk_zones, uz_link) {
> if (LIST_NEXT(z, uz_link) == NULL) {
> LIST_INSERT_AFTER(z, zone, uz_link);
> @@ -1358,9 +1418,9 @@ zone_ctor(void *mem, int size, void *uda
> }
> ZONE_UNLOCK(zone);
> mtx_unlock(&uma_mtx);
> - } else if (arg->keg == NULL) {
> - if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
> - arg->align, arg->flags) == NULL)
> + } else if (keg == NULL) {
> + if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
> + arg->align, arg->flags)) == NULL)
> return (ENOMEM);
> } else {
> struct uma_kctor_args karg;
> @@ -1378,15 +1438,22 @@ zone_ctor(void *mem, int size, void *uda
> if (error)
> return (error);
> }
> - keg = zone->uz_keg;
> + /*
> + * Link in the first keg.
> + */
> + zone->uz_klink.kl_keg = keg;
> + LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
> zone->uz_lock = &keg->uk_lock;
> + zone->uz_size = keg->uk_size;
> + zone->uz_flags |= (keg->uk_flags &
> + (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
>
> /*
> * Some internal zones don't have room allocated for the per cpu
> * caches. If we're internal, bail out here.
> */
> if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
> - KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
> + KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
> ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
> return (0);
> }
> @@ -1413,18 +1480,17 @@ keg_dtor(void *arg, int size, void *udat
> uma_keg_t keg;
>
> keg = (uma_keg_t)arg;
> - mtx_lock(&keg->uk_lock);
> + KEG_LOCK(keg);
> if (keg->uk_free != 0) {
> printf("Freed UMA keg was not empty (%d items). "
> " Lost %d pages of memory.\n",
> keg->uk_free, keg->uk_pages);
> }
> - mtx_unlock(&keg->uk_lock);
> + KEG_UNLOCK(keg);
>
> - if (keg->uk_flags & UMA_ZONE_HASH)
> - hash_free(&keg->uk_hash);
> + hash_free(&keg->uk_hash);
>
> - mtx_destroy(&keg->uk_lock);
> + KEG_LOCK_FINI(keg);
> }
>
> /*
> @@ -1436,38 +1502,46 @@ keg_dtor(void *arg, int size, void *udat
> static void
> zone_dtor(void *arg, int size, void *udata)
> {
> + uma_klink_t klink;
> uma_zone_t zone;
> uma_keg_t keg;
>
> zone = (uma_zone_t)arg;
> - keg = zone->uz_keg;
> + keg = zone_first_keg(zone);
>
> - if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
> + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
> cache_drain(zone);
>
> mtx_lock(&uma_mtx);
> - zone_drain(zone);
> - if (keg->uk_flags & UMA_ZONE_SECONDARY) {
> - LIST_REMOVE(zone, uz_link);
> - /*
> - * XXX there are some races here where
> - * the zone can be drained but zone lock
> - * released and then refilled before we
> - * remove it... we dont care for now
> - */
> - ZONE_LOCK(zone);
> - if (LIST_EMPTY(&keg->uk_zones))
> - keg->uk_flags &= ~UMA_ZONE_SECONDARY;
> - ZONE_UNLOCK(zone);
> - mtx_unlock(&uma_mtx);
> - } else {
> + LIST_REMOVE(zone, uz_link);
> + mtx_unlock(&uma_mtx);
> + /*
> + * XXX there are some races here where
> + * the zone can be drained but zone lock
> + * released and then refilled before we
> + * remove it... we dont care for now
> + */
> + zone_drain_wait(zone, M_WAITOK);
> + /*
> + * Unlink all of our kegs.
> + */
> + while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
> + klink->kl_keg = NULL;
> + LIST_REMOVE(klink, kl_link);
> + if (klink == &zone->uz_klink)
> + continue;
> + free(klink, M_TEMP);
> + }
> + /*
> + * We only destroy kegs from non secondary zones.
> + */
> + if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
> + mtx_lock(&uma_mtx);
> LIST_REMOVE(keg, uk_link);
> - LIST_REMOVE(zone, uz_link);
> mtx_unlock(&uma_mtx);
> - uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
> + zone_free_item(kegs, keg, NULL, SKIP_NONE,
> ZFREE_STATFREE);
> }
> - zone->uz_keg = NULL;
> }
>
> /*
> @@ -1517,7 +1591,7 @@ uma_startup(void *bootmem, int boot_page
> * (UMA_MAX_WASTE).
> *
> * We iterate until we find an object size for
> - * which the calculated wastage in zone_small_init() will be
> + * which the calculated wastage in keg_small_init() will be
> * enough to warrant OFFPAGE. Since wastedspace versus objsize
> * is an overall increasing see-saw function, we find the smallest
> * objsize such that the wastage is always acceptable for objects
> @@ -1525,7 +1599,7 @@ uma_startup(void *bootmem, int boot_page
> * generates a larger possible uma_max_ipers, we use this computed
> * objsize to calculate the largest ipers possible. Since the
> * ipers calculated for OFFPAGE slab headers is always larger than
> - * the ipers initially calculated in zone_small_init(), we use
> + * the ipers initially calculated in keg_small_init(), we use
> * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
> * obtain the maximum ipers possible for offpage slab headers.
> *
> @@ -1557,7 +1631,7 @@ uma_startup(void *bootmem, int boot_page
> }
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>
More information about the svn-src-all
mailing list