svn commit: r187681 - in head/sys: kern vm

Jeff Roberson jroberson at jroberson.net
Sun Jan 25 01:15:43 PST 2009


On Sun, 25 Jan 2009, Jeff Roberson wrote:

> Author: jeff
> Date: Sun Jan 25 09:11:24 2009
> New Revision: 187681
> URL: http://svn.freebsd.org/changeset/base/187681
>
> Log:
>   - Make the keg abstraction more complete.  Permit a zone to have multiple
>     backend kegs so it may source compatible memory from multiple backends.
>     This is useful for cases such as NUMA or different layouts for the same
>     memory type.
>   - Provide a new api for adding new backend kegs to secondary zones.
>   - Provide a new flag for adjusting the layout of zones to stagger
>     allocations better across cache lines.

There are currently no in-tree users for the new functionality provided by 
this diff.  Our network stack has other bottlenecks in play before 
memory layout optimizations are helpful.  However, I think anyone who has 
looked at UMA internals as they have evolved over the last 7 years will 
appreciate the refactoring.

Thanks,
Jeff

>
>  Sponsored by:	Nokia
>
> Modified:
>  head/sys/kern/kern_malloc.c
>  head/sys/vm/uma.h
>  head/sys/vm/uma_core.c
>  head/sys/vm/uma_dbg.c
>  head/sys/vm/uma_int.h
>
> Modified: head/sys/kern/kern_malloc.c
> ==============================================================================
> --- head/sys/kern/kern_malloc.c	Sun Jan 25 08:27:11 2009	(r187680)
> +++ head/sys/kern/kern_malloc.c	Sun Jan 25 09:11:24 2009	(r187681)
> @@ -329,7 +329,6 @@ malloc(unsigned long size, struct malloc
> 	int indx;
> 	caddr_t va;
> 	uma_zone_t zone;
> -	uma_keg_t keg;
> #if defined(DIAGNOSTIC) || defined(DEBUG_REDZONE)
> 	unsigned long osize = size;
> #endif
> @@ -378,18 +377,16 @@ malloc(unsigned long size, struct malloc
> 			size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
> 		indx = kmemsize[size >> KMEM_ZSHIFT];
> 		zone = kmemzones[indx].kz_zone;
> -		keg = zone->uz_keg;
> #ifdef MALLOC_PROFILE
> 		krequests[size >> KMEM_ZSHIFT]++;
> #endif
> 		va = uma_zalloc(zone, flags);
> 		if (va != NULL)
> -			size = keg->uk_size;
> +			size = zone->uz_size;
> 		malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
> 	} else {
> 		size = roundup(size, PAGE_SIZE);
> 		zone = NULL;
> -		keg = NULL;
> 		va = uma_large_malloc(size, flags);
> 		malloc_type_allocated(mtp, va == NULL ? 0 : size);
> 	}
>
> Modified: head/sys/vm/uma.h
> ==============================================================================
> --- head/sys/vm/uma.h	Sun Jan 25 08:27:11 2009	(r187680)
> +++ head/sys/vm/uma.h	Sun Jan 25 09:11:24 2009	(r187681)
> @@ -205,6 +205,17 @@ uma_zone_t uma_zsecond_create(char *name
> 		    uma_init zinit, uma_fini zfini, uma_zone_t master);
>
> /*
> + * Add a second master to a secondary zone.  This provides multiple data
> + * backends for objects with the same size.  Both masters must have
> + * compatible allocation flags.  Presently, UMA_ZONE_MALLOC type zones are
> + * the only supported.
> + *
> + * Returns:
> + * 	Error on failure, 0 on success.
> + */
> +int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
> +
> +/*
>  * Definitions for uma_zcreate flags
>  *
>  * These flags share space with UMA_ZFLAGs in uma_int.h.  Be careful not to
> @@ -230,6 +241,22 @@ uma_zone_t uma_zsecond_create(char *name
> #define	UMA_ZONE_SECONDARY	0x0200	/* Zone is a Secondary Zone */
> #define	UMA_ZONE_REFCNT		0x0400	/* Allocate refcnts in slabs */
> #define	UMA_ZONE_MAXBUCKET	0x0800	/* Use largest buckets */
> +#define	UMA_ZONE_CACHESPREAD	0x1000	/*
> +					 * Spread memory start locations across
> +					 * all possible cache lines.  May
> +					 * require many virtually contiguous
> +					 * backend pages and can fail early.
> +					 */
> +#define	UMA_ZONE_VTOSLAB	0x2000	/* Zone uses vtoslab for lookup. */
> +
> +/*
> + * These flags are shared between the keg and zone.  In zones wishing to add
> + * new kegs these flags must be compatible.  Some are determined based on
> + * physical parameters of the request and may not be provided by the consumer.
> + */
> +#define	UMA_ZONE_INHERIT						\
> +    (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH |		\
> +    UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
>
> /* Definitions for align */
> #define UMA_ALIGN_PTR	(sizeof(void *) - 1)	/* Alignment fit for ptr */
>
> Modified: head/sys/vm/uma_core.c
> ==============================================================================
> --- head/sys/vm/uma_core.c	Sun Jan 25 08:27:11 2009	(r187680)
> +++ head/sys/vm/uma_core.c	Sun Jan 25 09:11:24 2009	(r187681)
> @@ -1,5 +1,5 @@
> /*-
> - * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff at FreeBSD.org>
> + * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff at FreeBSD.org>
>  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic at FreeBSD.org>
>  * Copyright (c) 2004-2006 Robert N. M. Watson
>  * All rights reserved.
> @@ -112,7 +112,7 @@ static uma_zone_t slabrefzone;	/* With r
> static uma_zone_t hashzone;
>
> /* The boot-time adjusted value for cache line alignment. */
> -static int uma_align_cache = 16 - 1;
> +static int uma_align_cache = 64 - 1;
>
> static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
>
> @@ -212,7 +212,7 @@ static void *obj_alloc(uma_zone_t, int,
> static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
> static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
> static void page_free(void *, int, u_int8_t);
> -static uma_slab_t slab_zalloc(uma_zone_t, int);
> +static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
> static void cache_drain(uma_zone_t);
> static void bucket_drain(uma_zone_t, uma_bucket_t);
> static void bucket_cache_drain(uma_zone_t zone);
> @@ -221,8 +221,8 @@ static void keg_dtor(void *, int, void *
> static int zone_ctor(void *, int, void *, int);
> static void zone_dtor(void *, int, void *);
> static int zero_init(void *, int, int);
> -static void zone_small_init(uma_zone_t zone);
> -static void zone_large_init(uma_zone_t zone);
> +static void keg_small_init(uma_keg_t keg);
> +static void keg_large_init(uma_keg_t keg);
> static void zone_foreach(void (*zfunc)(uma_zone_t));
> static void zone_timeout(uma_zone_t zone);
> static int hash_alloc(struct uma_hash *);
> @@ -230,19 +230,22 @@ static int hash_expand(struct uma_hash *
> static void hash_free(struct uma_hash *hash);
> static void uma_timeout(void *);
> static void uma_startup3(void);
> -static void *uma_zalloc_internal(uma_zone_t, void *, int);
> -static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
> +static void *zone_alloc_item(uma_zone_t, void *, int);
> +static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
>     int);
> static void bucket_enable(void);
> static void bucket_init(void);
> static uma_bucket_t bucket_alloc(int, int);
> static void bucket_free(uma_bucket_t);
> static void bucket_zone_drain(void);
> -static int uma_zalloc_bucket(uma_zone_t zone, int flags);
> -static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
> -static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
> -static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
> +static int zone_alloc_bucket(uma_zone_t zone, int flags);
> +static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
> +static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
> +static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
> +static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
>     uma_fini fini, int align, u_int32_t flags);
> +static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
> +static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
>
> void uma_print_zone(uma_zone_t);
> void uma_print_stats(void);
> @@ -291,7 +294,8 @@ bucket_init(void)
> 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
> 		size += sizeof(void *) * ubz->ubz_entries;
> 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
> -		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
> +		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
> +		    UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
> 		for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
> 			bucket_size[i >> BUCKET_SHIFT] = j;
> 	}
> @@ -326,7 +330,7 @@ bucket_alloc(int entries, int bflags)
> 		return (NULL);
>
> 	ubz = bucket_zone_lookup(entries);
> -	bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
> +	bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
> 	if (bucket) {
> #ifdef INVARIANTS
> 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
> @@ -344,7 +348,7 @@ bucket_free(uma_bucket_t bucket)
> 	struct uma_bucket_zone *ubz;
>
> 	ubz = bucket_zone_lookup(bucket->ub_entries);
> -	uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
> +	zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
> 	    ZFREE_STATFREE);
> }
>
> @@ -357,6 +361,21 @@ bucket_zone_drain(void)
> 		zone_drain(ubz->ubz_zone);
> }
>
> +static inline uma_keg_t
> +zone_first_keg(uma_zone_t zone)
> +{
> +
> +	return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
> +}
> +
> +static void
> +zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
> +{
> +	uma_klink_t klink;
> +
> +	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
> +		kegfn(klink->kl_keg);
> +}
>
> /*
>  * Routine called by timeout which is used to fire off some time interval
> @@ -382,29 +401,20 @@ uma_timeout(void *unused)
>  * Routine to perform timeout driven calculations.  This expands the
>  * hashes and does per cpu statistics aggregation.
>  *
> - *  Arguments:
> - *	zone  The zone to operate on
> - *
> - *  Returns:
> - *	Nothing
> + *  Returns nothing.
>  */
> static void
> -zone_timeout(uma_zone_t zone)
> +keg_timeout(uma_keg_t keg)
> {
> -	uma_keg_t keg;
> -	u_int64_t alloc;
> -
> -	keg = zone->uz_keg;
> -	alloc = 0;
>
> +	KEG_LOCK(keg);
> 	/*
> -	 * Expand the zone hash table.
> +	 * Expand the keg hash table.
> 	 *
> 	 * This is done if the number of slabs is larger than the hash size.
> 	 * What I'm trying to do here is completely reduce collisions.  This
> 	 * may be a little aggressive.  Should I allow for two collisions max?
> 	 */
> -	ZONE_LOCK(zone);
> 	if (keg->uk_flags & UMA_ZONE_HASH &&
> 	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
> 		struct uma_hash newhash;
> @@ -413,14 +423,14 @@ zone_timeout(uma_zone_t zone)
>
> 		/*
> 		 * This is so involved because allocating and freeing
> -		 * while the zone lock is held will lead to deadlock.
> +		 * while the keg lock is held will lead to deadlock.
> 		 * I have to do everything in stages and check for
> 		 * races.
> 		 */
> 		newhash = keg->uk_hash;
> -		ZONE_UNLOCK(zone);
> +		KEG_UNLOCK(keg);
> 		ret = hash_alloc(&newhash);
> -		ZONE_LOCK(zone);
> +		KEG_LOCK(keg);
> 		if (ret) {
> 			if (hash_expand(&keg->uk_hash, &newhash)) {
> 				oldhash = keg->uk_hash;
> @@ -428,12 +438,19 @@ zone_timeout(uma_zone_t zone)
> 			} else
> 				oldhash = newhash;
>
> -			ZONE_UNLOCK(zone);
> +			KEG_UNLOCK(keg);
> 			hash_free(&oldhash);
> -			ZONE_LOCK(zone);
> +			KEG_LOCK(keg);
> 		}
> 	}
> -	ZONE_UNLOCK(zone);
> +	KEG_UNLOCK(keg);
> +}
> +
> +static void
> +zone_timeout(uma_zone_t zone)
> +{
> +
> +	zone_foreach_keg(zone, &keg_timeout);
> }
>
> /*
> @@ -462,7 +479,7 @@ hash_alloc(struct uma_hash *hash)
> 		    M_UMAHASH, M_NOWAIT);
> 	} else {
> 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
> -		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
> +		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
> 		    M_WAITOK);
> 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
> 	}
> @@ -535,7 +552,7 @@ hash_free(struct uma_hash *hash)
> 	if (hash->uh_slab_hash == NULL)
> 		return;
> 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
> -		uma_zfree_internal(hashzone,
> +		zone_free_item(hashzone,
> 		    hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
> 	else
> 		free(hash->uh_slab_hash, M_UMAHASH);
> @@ -555,20 +572,11 @@ hash_free(struct uma_hash *hash)
> static void
> bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
> {
> -	uma_slab_t slab;
> -	int mzone;
> 	void *item;
>
> 	if (bucket == NULL)
> 		return;
>
> -	slab = NULL;
> -	mzone = 0;
> -
> -	/* We have to lookup the slab again for malloc.. */
> -	if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
> -		mzone = 1;
> -
> 	while (bucket->ub_cnt > 0)  {
> 		bucket->ub_cnt--;
> 		item = bucket->ub_bucket[bucket->ub_cnt];
> @@ -577,15 +585,7 @@ bucket_drain(uma_zone_t zone, uma_bucket
> 		KASSERT(item != NULL,
> 		    ("bucket_drain: botched ptr, item is NULL"));
> #endif
> -		/*
> -		 * This is extremely inefficient.  The slab pointer was passed
> -		 * to uma_zfree_arg, but we lost it because the buckets don't
> -		 * hold them.  This will go away when free() gets a size passed
> -		 * to it.
> -		 */
> -		if (mzone)
> -			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
> -		uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
> +		zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
> 	}
> }
>
> @@ -665,42 +665,32 @@ bucket_cache_drain(uma_zone_t zone)
> }
>
> /*
> - * Frees pages from a zone back to the system.  This is done on demand from
> + * Frees pages from a keg back to the system.  This is done on demand from
>  * the pageout daemon.
>  *
> - * Arguments:
> - *	zone  The zone to free pages from
> - *	 all  Should we drain all items?
> - *
> - * Returns:
> - *	Nothing.
> + * Returns nothing.
>  */
> -void
> -zone_drain(uma_zone_t zone)
> +static void
> +keg_drain(uma_keg_t keg)
> {
> 	struct slabhead freeslabs = { 0 };
> -	uma_keg_t keg;
> 	uma_slab_t slab;
> 	uma_slab_t n;
> 	u_int8_t flags;
> 	u_int8_t *mem;
> 	int i;
>
> -	keg = zone->uz_keg;
> -
> 	/*
> -	 * We don't want to take pages from statically allocated zones at this
> +	 * We don't want to take pages from statically allocated kegs at this
> 	 * time
> 	 */
> 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
> 		return;
>
> -	ZONE_LOCK(zone);
> -
> #ifdef UMA_DEBUG
> -	printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
> +	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
> #endif
> -	bucket_cache_drain(zone);
> +	KEG_LOCK(keg);
> 	if (keg->uk_free == 0)
> 		goto finished;
>
> @@ -726,7 +716,7 @@ zone_drain(uma_zone_t zone)
> 		slab = n;
> 	}
> finished:
> -	ZONE_UNLOCK(zone);
> +	KEG_UNLOCK(keg);
>
> 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
> 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
> @@ -738,8 +728,7 @@ finished:
> 		flags = slab->us_flags;
> 		mem = slab->us_data;
>
> -		if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
> -		    (keg->uk_flags & UMA_ZONE_REFCNT)) {
> +		if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
> 			vm_object_t obj;
>
> 			if (flags & UMA_SLAB_KMEM)
> @@ -753,21 +742,61 @@ finished:
> 				    obj);
> 		}
> 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
> -			uma_zfree_internal(keg->uk_slabzone, slab, NULL,
> +			zone_free_item(keg->uk_slabzone, slab, NULL,
> 			    SKIP_NONE, ZFREE_STATFREE);
> #ifdef UMA_DEBUG
> 		printf("%s: Returning %d bytes.\n",
> -		    zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
> +		    keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
> #endif
> 		keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
> 	}
> }
>
> +static void
> +zone_drain_wait(uma_zone_t zone, int waitok)
> +{
> +
> +	/*
> +	 * Set draining to interlock with zone_dtor() so we can release our
> +	 * locks as we go.  Only dtor() should do a WAITOK call since it
> +	 * is the only call that knows the structure will still be available
> +	 * when it wakes up.
> +	 */
> +	ZONE_LOCK(zone);
> +	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
> +		if (waitok == M_NOWAIT)
> +			goto out;
> +		mtx_unlock(&uma_mtx);
> +		msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
> +		mtx_lock(&uma_mtx);
> +	}
> +	zone->uz_flags |= UMA_ZFLAG_DRAINING;
> +	bucket_cache_drain(zone);
> +	ZONE_UNLOCK(zone);
> +	/*
> +	 * The DRAINING flag protects us from being freed while
> +	 * we're running.  Normally the uma_mtx would protect us but we
> +	 * must be able to release and acquire the right lock for each keg.
> +	 */
> +	zone_foreach_keg(zone, &keg_drain);
> +	ZONE_LOCK(zone);
> +	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
> +	wakeup(zone);
> +out:
> +	ZONE_UNLOCK(zone);
> +}
> +
> +void
> +zone_drain(uma_zone_t zone)
> +{
> +
> +	zone_drain_wait(zone, M_NOWAIT);
> +}
> +
> /*
> - * Allocate a new slab for a zone.  This does not insert the slab onto a list.
> + * Allocate a new slab for a keg.  This does not insert the slab onto a list.
>  *
>  * Arguments:
> - *	zone  The zone to allocate slabs for
>  *	wait  Shall we wait?
>  *
>  * Returns:
> @@ -775,27 +804,28 @@ finished:
>  *	caller specified M_NOWAIT.
>  */
> static uma_slab_t
> -slab_zalloc(uma_zone_t zone, int wait)
> +keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
> {
> 	uma_slabrefcnt_t slabref;
> +	uma_alloc allocf;
> 	uma_slab_t slab;
> -	uma_keg_t keg;
> 	u_int8_t *mem;
> 	u_int8_t flags;
> 	int i;
>
> +	mtx_assert(&keg->uk_lock, MA_OWNED);
> 	slab = NULL;
> -	keg = zone->uz_keg;
>
> #ifdef UMA_DEBUG
> -	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
> +	printf("slab_zalloc:  Allocating a new slab for %s\n", keg->uk_name);
> #endif
> -	ZONE_UNLOCK(zone);
> +	allocf = keg->uk_allocf;
> +	KEG_UNLOCK(keg);
>
> 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
> -		slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
> +		slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
> 		if (slab == NULL) {
> -			ZONE_LOCK(zone);
> +			KEG_LOCK(keg);
> 			return NULL;
> 		}
> 	}
> @@ -812,13 +842,13 @@ slab_zalloc(uma_zone_t zone, int wait)
> 	else
> 		wait &= ~M_ZERO;
>
> -	mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
> -	    &flags, wait);
> +	/* zone is passed for legacy reasons. */
> +	mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
> 	if (mem == NULL) {
> 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
> -			uma_zfree_internal(keg->uk_slabzone, slab, NULL,
> +			zone_free_item(keg->uk_slabzone, slab, NULL,
> 			    SKIP_NONE, ZFREE_STATFREE);
> -		ZONE_LOCK(zone);
> +		KEG_LOCK(keg);
> 		return (NULL);
> 	}
>
> @@ -826,8 +856,7 @@ slab_zalloc(uma_zone_t zone, int wait)
> 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
> 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
>
> -	if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
> -	    (keg->uk_flags & UMA_ZONE_REFCNT))
> +	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
> 		for (i = 0; i < keg->uk_ppera; i++)
> 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
>
> @@ -860,8 +889,7 @@ slab_zalloc(uma_zone_t zone, int wait)
> 					    (keg->uk_rsize * i),
> 					    keg->uk_size);
> 			}
> -			if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
> -			    (keg->uk_flags & UMA_ZONE_REFCNT)) {
> +			if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
> 				vm_object_t obj;
>
> 				if (flags & UMA_SLAB_KMEM)
> @@ -875,15 +903,15 @@ slab_zalloc(uma_zone_t zone, int wait)
> 					    (i * PAGE_SIZE), obj);
> 			}
> 			if (keg->uk_flags & UMA_ZONE_OFFPAGE)
> -				uma_zfree_internal(keg->uk_slabzone, slab,
> +				zone_free_item(keg->uk_slabzone, slab,
> 				    NULL, SKIP_NONE, ZFREE_STATFREE);
> 			keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
> 			    flags);
> -			ZONE_LOCK(zone);
> +			KEG_LOCK(keg);
> 			return (NULL);
> 		}
> 	}
> -	ZONE_LOCK(zone);
> +	KEG_LOCK(keg);
>
> 	if (keg->uk_flags & UMA_ZONE_HASH)
> 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
> @@ -905,7 +933,7 @@ startup_alloc(uma_zone_t zone, int bytes
> 	uma_keg_t keg;
> 	uma_slab_t tmps;
>
> -	keg = zone->uz_keg;
> +	keg = zone_first_keg(zone);
>
> 	/*
> 	 * Check our small startup cache to see if it has pages remaining.
> @@ -935,7 +963,6 @@ startup_alloc(uma_zone_t zone, int bytes
>  * Allocates a number of pages from the system
>  *
>  * Arguments:
> - *	zone  Unused
>  *	bytes  The number of bytes requested
>  *	wait  Shall we wait?
>  *
> @@ -958,7 +985,6 @@ page_alloc(uma_zone_t zone, int bytes, u
>  * Allocates a number of pages from within an object
>  *
>  * Arguments:
> - *	zone   Unused
>  *	bytes  The number of bytes requested
>  *	wait   Shall we wait?
>  *
> @@ -973,8 +999,10 @@ obj_alloc(uma_zone_t zone, int bytes, u_
> 	vm_offset_t retkva, zkva;
> 	vm_page_t p;
> 	int pages, startpages;
> +	uma_keg_t keg;
>
> -	object = zone->uz_keg->uk_obj;
> +	keg = zone_first_keg(zone);
> +	object = keg->uk_obj;
> 	retkva = 0;
>
> 	/*
> @@ -984,7 +1012,7 @@ obj_alloc(uma_zone_t zone, int bytes, u_
> 	p = TAILQ_LAST(&object->memq, pglist);
> 	pages = p != NULL ? p->pindex + 1 : 0;
> 	startpages = pages;
> -	zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
> +	zkva = keg->uk_kva + pages * PAGE_SIZE;
> 	for (; bytes > 0; bytes -= PAGE_SIZE) {
> 		p = vm_page_alloc(object, pages,
> 		    VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
> @@ -1052,25 +1080,23 @@ zero_init(void *mem, int size, int flags
> }
>
> /*
> - * Finish creating a small uma zone.  This calculates ipers, and the zone size.
> + * Finish creating a small uma keg.  This calculates ipers, and the keg size.
>  *
>  * Arguments
> - *	zone  The zone we should initialize
> + *	keg  The zone we should initialize
>  *
>  * Returns
>  *	Nothing
>  */
> static void
> -zone_small_init(uma_zone_t zone)
> +keg_small_init(uma_keg_t keg)
> {
> -	uma_keg_t keg;
> 	u_int rsize;
> 	u_int memused;
> 	u_int wastedspace;
> 	u_int shsize;
>
> -	keg = zone->uz_keg;
> -	KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
> +	KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
> 	rsize = keg->uk_size;
>
> 	if (rsize < UMA_SMALLEST_UNIT)
> @@ -1090,7 +1116,7 @@ zone_small_init(uma_zone_t zone)
> 	}
>
> 	keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
> -	KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
> +	KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
> 	memused = keg->uk_ipers * rsize + shsize;
> 	wastedspace = UMA_SLAB_SIZE - memused;
>
> @@ -1109,44 +1135,41 @@ zone_small_init(uma_zone_t zone)
> 	    (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
> 		keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
> 		KASSERT(keg->uk_ipers <= 255,
> -		    ("zone_small_init: keg->uk_ipers too high!"));
> +		    ("keg_small_init: keg->uk_ipers too high!"));
> #ifdef UMA_DEBUG
> 		printf("UMA decided we need offpage slab headers for "
> -		    "zone: %s, calculated wastedspace = %d, "
> +		    "keg: %s, calculated wastedspace = %d, "
> 		    "maximum wasted space allowed = %d, "
> 		    "calculated ipers = %d, "
> -		    "new wasted space = %d\n", zone->uz_name, wastedspace,
> +		    "new wasted space = %d\n", keg->uk_name, wastedspace,
> 		    UMA_MAX_WASTE, keg->uk_ipers,
> 		    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
> #endif
> 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
> -		if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
> +		if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
> 			keg->uk_flags |= UMA_ZONE_HASH;
> 	}
> }
>
> /*
> - * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
> + * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
>  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
>  * more complicated.
>  *
>  * Arguments
> - *	zone  The zone we should initialize
> + *	keg  The keg we should initialize
>  *
>  * Returns
>  *	Nothing
>  */
> static void
> -zone_large_init(uma_zone_t zone)
> +keg_large_init(uma_keg_t keg)
> {
> -	uma_keg_t keg;
> 	int pages;
>
> -	keg = zone->uz_keg;
> -
> -	KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
> +	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
> 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
> -	    ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
> +	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
>
> 	pages = keg->uk_size / UMA_SLAB_SIZE;
>
> @@ -1158,12 +1181,44 @@ zone_large_init(uma_zone_t zone)
> 	keg->uk_ipers = 1;
>
> 	keg->uk_flags |= UMA_ZONE_OFFPAGE;
> -	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
> +	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
> 		keg->uk_flags |= UMA_ZONE_HASH;
>
> 	keg->uk_rsize = keg->uk_size;
> }
>
> +static void
> +keg_cachespread_init(uma_keg_t keg)
> +{
> +	int alignsize;
> +	int trailer;
> +	int pages;
> +	int rsize;
> +
> +	alignsize = keg->uk_align + 1;
> +	rsize = keg->uk_size;
> +	/*
> +	 * We want one item to start on every align boundary in a page.  To
> +	 * do this we will span pages.  We will also extend the item by the
> +	 * size of align if it is an even multiple of align.  Otherwise, it
> +	 * would fall on the same boundary every time.
> +	 */
> +	if (rsize & keg->uk_align)
> +		rsize = (rsize & ~keg->uk_align) + alignsize;
> +	if ((rsize & alignsize) == 0)
> +		rsize += alignsize;
> +	trailer = rsize - keg->uk_size;
> +	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
> +	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
> +	keg->uk_rsize = rsize;
> +	keg->uk_ppera = pages;
> +	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
> +	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
> +	KASSERT(keg->uk_ipers <= uma_max_ipers,
> +	    ("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
> +	    keg->uk_ipers));
> +}
> +
> /*
>  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
>  * the keg onto the global keg list.
> @@ -1195,7 +1250,7 @@ keg_ctor(void *mem, int size, void *udat
> 	 * The master zone is passed to us at keg-creation time.
> 	 */
> 	zone = arg->zone;
> -	zone->uz_keg = keg;
> +	keg->uk_name = zone->uz_name;
>
> 	if (arg->flags & UMA_ZONE_VM)
> 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
> @@ -1203,24 +1258,31 @@ keg_ctor(void *mem, int size, void *udat
> 	if (arg->flags & UMA_ZONE_ZINIT)
> 		keg->uk_init = zero_init;
>
> +	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
> +		keg->uk_flags |= UMA_ZONE_VTOSLAB;
> +
> 	/*
> 	 * The +UMA_FRITM_SZ added to uk_size is to account for the
> -	 * linkage that is added to the size in zone_small_init().  If
> +	 * linkage that is added to the size in keg_small_init().  If
> 	 * we don't account for this here then we may end up in
> -	 * zone_small_init() with a calculated 'ipers' of 0.
> +	 * keg_small_init() with a calculated 'ipers' of 0.
> 	 */
> 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
> -		if ((keg->uk_size+UMA_FRITMREF_SZ) >
> +		if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
> +			keg_cachespread_init(keg);
> +		else if ((keg->uk_size+UMA_FRITMREF_SZ) >
> 		    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
> -			zone_large_init(zone);
> +			keg_large_init(keg);
> 		else
> -			zone_small_init(zone);
> +			keg_small_init(keg);
> 	} else {
> -		if ((keg->uk_size+UMA_FRITM_SZ) >
> +		if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
> +			keg_cachespread_init(keg);
> +		else if ((keg->uk_size+UMA_FRITM_SZ) >
> 		    (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
> -			zone_large_init(zone);
> +			keg_large_init(keg);
> 		else
> -			zone_small_init(zone);
> +			keg_small_init(keg);
> 	}
>
> 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
> @@ -1244,14 +1306,12 @@ keg_ctor(void *mem, int size, void *udat
> 	}
>
> 	/*
> -	 * Initialize keg's lock (shared among zones) through
> -	 * Master zone
> +	 * Initialize keg's lock (shared among zones).
> 	 */
> -	zone->uz_lock = &keg->uk_lock;
> 	if (arg->flags & UMA_ZONE_MTXCLASS)
> -		ZONE_LOCK_INIT(zone, 1);
> +		KEG_LOCK_INIT(keg, 1);
> 	else
> -		ZONE_LOCK_INIT(zone, 0);
> +		KEG_LOCK_INIT(keg, 0);
>
> 	/*
> 	 * If we're putting the slab header in the actual page we need to
> @@ -1300,10 +1360,10 @@ keg_ctor(void *mem, int size, void *udat
> 		hash_alloc(&keg->uk_hash);
>
> #ifdef UMA_DEBUG
> -	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
> -	    zone->uz_name, zone,
> -	    keg->uk_size, keg->uk_ipers,
> -	    keg->uk_ppera, keg->uk_pgoff);
> +	printf("UMA: %s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
> +	    zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
> +	    keg->uk_ipers, keg->uk_ppera,
> +	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
> #endif
>
> 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
> @@ -1320,7 +1380,6 @@ keg_ctor(void *mem, int size, void *udat
>  * Arguments/Returns follow uma_ctor specifications
>  *	udata  Actually uma_zctor_args
>  */
> -
> static int
> zone_ctor(void *mem, int size, void *udata, int flags)
> {
> @@ -1333,23 +1392,24 @@ zone_ctor(void *mem, int size, void *uda
> 	zone->uz_name = arg->name;
> 	zone->uz_ctor = arg->ctor;
> 	zone->uz_dtor = arg->dtor;
> +	zone->uz_slab = zone_fetch_slab;
> 	zone->uz_init = NULL;
> 	zone->uz_fini = NULL;
> 	zone->uz_allocs = 0;
> 	zone->uz_frees = 0;
> 	zone->uz_fails = 0;
> 	zone->uz_fills = zone->uz_count = 0;
> +	zone->uz_flags = 0;
> +	keg = arg->keg;
>
> 	if (arg->flags & UMA_ZONE_SECONDARY) {
> 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
> -		keg = arg->keg;
> -		zone->uz_keg = keg;
> 		zone->uz_init = arg->uminit;
> 		zone->uz_fini = arg->fini;
> 		zone->uz_lock = &keg->uk_lock;
> +		zone->uz_flags |= UMA_ZONE_SECONDARY;
> 		mtx_lock(&uma_mtx);
> 		ZONE_LOCK(zone);
> -		keg->uk_flags |= UMA_ZONE_SECONDARY;
> 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
> 			if (LIST_NEXT(z, uz_link) == NULL) {
> 				LIST_INSERT_AFTER(z, zone, uz_link);
> @@ -1358,9 +1418,9 @@ zone_ctor(void *mem, int size, void *uda
> 		}
> 		ZONE_UNLOCK(zone);
> 		mtx_unlock(&uma_mtx);
> -	} else if (arg->keg == NULL) {
> -		if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
> -		    arg->align, arg->flags) == NULL)
> +	} else if (keg == NULL) {
> +		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
> +		    arg->align, arg->flags)) == NULL)
> 			return (ENOMEM);
> 	} else {
> 		struct uma_kctor_args karg;
> @@ -1378,15 +1438,22 @@ zone_ctor(void *mem, int size, void *uda
> 		if (error)
> 			return (error);
> 	}
> -	keg = zone->uz_keg;
> +	/*
> +	 * Link in the first keg.
> +	 */
> +	zone->uz_klink.kl_keg = keg;
> +	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
> 	zone->uz_lock = &keg->uk_lock;
> +	zone->uz_size = keg->uk_size;
> +	zone->uz_flags |= (keg->uk_flags &
> +	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
>
> 	/*
> 	 * Some internal zones don't have room allocated for the per cpu
> 	 * caches.  If we're internal, bail out here.
> 	 */
> 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
> -		KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
> +		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
> 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
> 		return (0);
> 	}
> @@ -1413,18 +1480,17 @@ keg_dtor(void *arg, int size, void *udat
> 	uma_keg_t keg;
>
> 	keg = (uma_keg_t)arg;
> -	mtx_lock(&keg->uk_lock);
> +	KEG_LOCK(keg);
> 	if (keg->uk_free != 0) {
> 		printf("Freed UMA keg was not empty (%d items). "
> 		    " Lost %d pages of memory.\n",
> 		    keg->uk_free, keg->uk_pages);
> 	}
> -	mtx_unlock(&keg->uk_lock);
> +	KEG_UNLOCK(keg);
>
> -	if (keg->uk_flags & UMA_ZONE_HASH)
> -		hash_free(&keg->uk_hash);
> +	hash_free(&keg->uk_hash);
>
> -	mtx_destroy(&keg->uk_lock);
> +	KEG_LOCK_FINI(keg);
> }
>
> /*
> @@ -1436,38 +1502,46 @@ keg_dtor(void *arg, int size, void *udat
> static void
> zone_dtor(void *arg, int size, void *udata)
> {
> +	uma_klink_t klink;
> 	uma_zone_t zone;
> 	uma_keg_t keg;
>
> 	zone = (uma_zone_t)arg;
> -	keg = zone->uz_keg;
> +	keg = zone_first_keg(zone);
>
> -	if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
> +	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
> 		cache_drain(zone);
>
> 	mtx_lock(&uma_mtx);
> -	zone_drain(zone);
> -	if (keg->uk_flags & UMA_ZONE_SECONDARY) {
> -		LIST_REMOVE(zone, uz_link);
> -		/*
> -		 * XXX there are some races here where
> -		 * the zone can be drained but zone lock
> -		 * released and then refilled before we
> -		 * remove it... we dont care for now
> -		 */
> -		ZONE_LOCK(zone);
> -		if (LIST_EMPTY(&keg->uk_zones))
> -			keg->uk_flags &= ~UMA_ZONE_SECONDARY;
> -		ZONE_UNLOCK(zone);
> -		mtx_unlock(&uma_mtx);
> -	} else {
> +	LIST_REMOVE(zone, uz_link);
> +	mtx_unlock(&uma_mtx);
> +	/*
> +	 * XXX there are some races here where
> +	 * the zone can be drained but zone lock
> +	 * released and then refilled before we
> +	 * remove it... we dont care for now
> +	 */
> +	zone_drain_wait(zone, M_WAITOK);
> +	/*
> +	 * Unlink all of our kegs.
> +	 */
> +	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
> +		klink->kl_keg = NULL;
> +		LIST_REMOVE(klink, kl_link);
> +		if (klink == &zone->uz_klink)
> +			continue;
> +		free(klink, M_TEMP);
> +	}
> +	/*
> +	 * We only destroy kegs from non secondary zones.
> +	 */
> +	if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
> +		mtx_lock(&uma_mtx);
> 		LIST_REMOVE(keg, uk_link);
> -		LIST_REMOVE(zone, uz_link);
> 		mtx_unlock(&uma_mtx);
> -		uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
> +		zone_free_item(kegs, keg, NULL, SKIP_NONE,
> 		    ZFREE_STATFREE);
> 	}
> -	zone->uz_keg = NULL;
> }
>
> /*
> @@ -1517,7 +1591,7 @@ uma_startup(void *bootmem, int boot_page
> 	 * (UMA_MAX_WASTE).
> 	 *
> 	 * We iterate until we find an object size for
> -	 * which the calculated wastage in zone_small_init() will be
> +	 * which the calculated wastage in keg_small_init() will be
> 	 * enough to warrant OFFPAGE.  Since wastedspace versus objsize
> 	 * is an overall increasing see-saw function, we find the smallest
> 	 * objsize such that the wastage is always acceptable for objects
> @@ -1525,7 +1599,7 @@ uma_startup(void *bootmem, int boot_page
> 	 * generates a larger possible uma_max_ipers, we use this computed
> 	 * objsize to calculate the largest ipers possible.  Since the
> 	 * ipers calculated for OFFPAGE slab headers is always larger than
> -	 * the ipers initially calculated in zone_small_init(), we use
> +	 * the ipers initially calculated in keg_small_init(), we use
> 	 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
> 	 * obtain the maximum ipers possible for offpage slab headers.
> 	 *
> @@ -1557,7 +1631,7 @@ uma_startup(void *bootmem, int boot_page
> 	}
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>


More information about the svn-src-all mailing list