[REVIEW] unit number allocation API

Poul-Henning Kamp phk at FreeBSD.org
Thu Jul 22 08:02:34 PDT 2004



We need to allocate unit numbers for (pseudo)devices, and a few
places we need to allocate inode numbers for synthetic filesystems
(for instance DEVFS).

For these applications the overhead of rman(9) can be totally
unacceptable (60 bytes per allocation ?) and something more memory
frugal is called for.

This is a small API I just wrote, targeted specifically for allocating
unit numbers and similar spaces.

Currently the allocation policy is "lowest free number", but it
would be possible to add support for allocating a specific number
as well.

It uses a mixed run-length/bitmap strategy with fixed size memory
chunks (so it can use uma(9) in the kernel).

Worst case memory usage is two bits per managed unit-number (worst
case is "allocate all units, free all the odd numbered ones").

For the typical case where we never free any unit numbers, it will
use 52 bytes in total on i386.

Please review.  (It can be run in userland)

Poul-Henning



/*-
 * Copyright (c) 2004 Poul-Henning Kamp
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 *
 * Unit number allocation functions.
 *
 * These functions implement a mixed run-length/bitmap management of unit
 * number spaces.
 *
 * Allocation is always lowest free number first.
 *
 * Worst case memory usage (disregarding boundary effects in the low end)
 * is two bits for each slot in the unit number space.
 *
 * The typical case where no unit numbers are freed is managed in a
 * constant sized memory footprint of:
 *	 sizeof(struct unithdr) + 2 * sizeof (struct unit)
 * (== 52 bytes on i386)
 *
 * A small userland driver program is included.
 *
 */

#ifndef _KERNEL
#include <stdio.h>
#include <stdlib.h>
#endif

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/bitstring.h>

#ifndef _KERNEL

#define KASSERT(cond, arg) \
	do { \
		if (!(cond)) { \
			printf arg; \
			exit (1); \
		} \
	} while (0)

#endif

/*
 * This is our basic building block.
 *
 * It can be used in three different ways depending on the value of the ptr
 * element.  If ptr is NULL, it represents a run of free units.  If ptr points
 * to the head, it represents a run of busy units.  Otherwise it points to
 * an bitstring where a set bit means busy.
 *
 * When a run is represented, the len field is the length of the run.
 * When a bitmap is represented the len field represents the number of busy
 * units in the bitmap.
 *
 * The bitmap is the same size as struct unit to optimize memory management.
 */
struct unit {
	TAILQ_ENTRY(unit)	list;
	u_int			len;
	void			*ptr;
};

/* Number of bits in the bitmap */
#define NBITS	(sizeof(struct unit) * 8)

/* Header element for a unit number space. */

struct unithdr {
	TAILQ_HEAD(unithd,unit)	head;
	u_int			low;
	u_int			high;
	u_int			busy;
};

#ifndef _KERNEL

/*
 * Userland memory management.  Just use calloc and keep track of how
 * many elements we have allocated for check_unithdr().
 */

static u_int	nalloc;

static void *
new_unit(void)
{
	nalloc++;
	return (calloc(1, sizeof(struct unit)));
}

static void
delete_unit(void *ptr)
{
	nalloc--;
	free(ptr);
}

#endif

#if defined(DIAGNOSTIC) || !defined(_KERNEL)
/*
 * Consistency check
 */
static void
check_unithdr(struct unithdr *uh)
{
	struct unit *up;
	u_int x, y, z, w;

	y = 0;
	z = 0;
	TAILQ_FOREACH(up, &uh->head, list) {
		if (up->ptr != uh && up->ptr != NULL) {
			w = 0;
			for (x = 0; x < NBITS; x++)
				if (bit_test((bitstr_t *)up->ptr, x))
					w++;
			KASSERT (w == up->len,
				    ("up->len = %d should be %d\n",
				    up->len, w));
		}
		if (up->ptr != NULL)
			y += up->len;
		z++;
		if (up->ptr != NULL && up->ptr != uh)
			z++;
	}
	KASSERT (y == uh->busy,
		("Wrong result, busy %u y %u\n", uh->busy, y));
	KASSERT (z == nalloc,
		("found %u alloc %u\n", z, nalloc));
}

#define CHECK_UNITHDR(uh)	check_unithdr(uh)

#else

#define CHECK_UNITHDR(uh)

#endif

/*
 * Allocate a new unit management set.
 *
 * Highest and lowest valid values given as paramters.
 */

struct unithdr *
new_unithdr(u_int low, u_int high)
{
	struct unithdr *uh;
	struct unit *up;

	KASSERT(low <= high,
	    ("new_unithdr(%u, %u) makes no sense", low, high));
	uh = calloc(1, sizeof *uh);
	TAILQ_INIT(&uh->head);
	uh->low = low;
	uh->high = high;
	up = new_unit();
	up->len = 1 + high - low;
	up->ptr = NULL;
	TAILQ_INSERT_HEAD(&uh->head, up, list);
	return (uh);
}

/*
 * See if a given unit should be collapsed with a neighbor
 */
static void
collapse_unit(struct unithdr *uh, struct unit *up)
{
	struct unit *upp;

	upp = TAILQ_PREV(up, unithd, list);
	if (upp != NULL && up->ptr == upp->ptr) {
		up->len += upp->len;
		TAILQ_REMOVE(&uh->head, upp, list);
		delete_unit(upp);
	}
	upp = TAILQ_NEXT(up, list);
	if (upp != NULL && up->ptr == upp->ptr) {
		up->len += upp->len;
		TAILQ_REMOVE(&uh->head, upp, list);
		delete_unit(upp);
	}
}

/*
 * Eliminate a zero length unit
 */
static void
elim_unit(struct unithdr *uh, struct unit *up)
{
	struct unit *upp;

	KASSERT(up->len == 0,
	    ("elim_unit on %u length unit", up->len));
	upp = TAILQ_PREV(up, unithd, list);
	if (upp == NULL)
		upp = TAILQ_NEXT(up, list);
	TAILQ_REMOVE(&uh->head, up, list);
	delete_unit(up);
	if (upp != NULL)
		collapse_unit(uh, upp);
}

/*
 * Allocate a free unit.
 */
u_int
alloc_unit(struct unithdr *uh)
{
	struct unit *up, *upp;
	u_int x;
	int y;

	CHECK_UNITHDR(uh);
	x = uh->low;
	TAILQ_FOREACH(up, &uh->head, list) {
		if (up->ptr == uh) {
			/* Skip busy entries */
			x += up->len;
			continue;
		}
		if (up->ptr != NULL) {
			/* Bitmap unit */
			bit_ffc((bitstr_t *)up->ptr, NBITS, &y);
			KASSERT(y != -1,
			    ("No clear bit in bitmap page"));
			bit_set((bitstr_t *)up->ptr, y);
			up->len++;
			if (up->len == NBITS) {
				/* The unit is all busy, drop bitmap */
				delete_unit(up->ptr);
				up->ptr = uh;
				collapse_unit(uh, up);
			}
			uh->busy++;
			CHECK_UNITHDR(uh);
			return (x + y);
		}
		if (up->len == 1) {
			/* Single free unit, grab it */
			up->ptr = uh;
			collapse_unit(uh, up);
			uh->busy++;
			CHECK_UNITHDR(uh);
			return (x);
		}
		/* Slice off first free unit */
		upp = TAILQ_PREV(up, unithd, list);
		if (upp == NULL || upp->ptr != uh) {
			upp = new_unit();
			upp->len = 0;
			upp->ptr = uh;
			TAILQ_INSERT_BEFORE(up, upp, list);
		}
		upp->len++;
		up->len--;
		uh->busy++;
		CHECK_UNITHDR(uh);
		return (x);
	}
	KASSERT(0 == 1,
	    ("Unit table full"));
}

/*
 * Free a unit.
 *
 * If we can save units by using a bitmap, do so.
 */
void
free_unit(struct unithdr *uh, u_int unit)
{
	struct unit *up, *upp, *upn, *ul;
	u_int x, l, xl, n, pl;

	KASSERT(unit >= uh->low && unit <= uh->high,
	    ("free_unit(%u) out of range [%u...%u]", unit, uh->low, uh->high));
	CHECK_UNITHDR(uh);
	unit -= uh->low;
	x = 0;
	l = unit - unit % NBITS;
	ul = 0;
	TAILQ_FOREACH(up, &uh->head, list) {
		/* Keep track of which unit we'll split if we do */
		if (x <= l) {
			ul = up;
			xl = x;
		}
		if (up->ptr != NULL && up->ptr != uh) { /* Bitmap */
			if (x + NBITS <= unit) {
				/* Skip bitmap */
				x += NBITS;
				continue;
			}
			KASSERT(bit_test((bitstr_t *)up->ptr, unit - x) != 0,
			    ("Freeing free unit %d %d (bitmap)\n", unit, unit - x));
			bit_clear((bitstr_t *)up->ptr, unit - x);
			uh->busy--;
			up->len--;
			/*
			 * XXX: up->len == 1 can possibly be collapsed to
			 * XXX: neighboring runs.
			 */
			if (up->len > 0)
				return;
			/* We have freed all units in bitmap, drop it */
			delete_unit(up->ptr);
			up->ptr = NULL;
			up->len = NBITS;
			collapse_unit(uh, up);
			CHECK_UNITHDR(uh);
			return;
		}
		if (x + up->len <= unit) { /* Run length unit */
			/* Skip run length */
			x += up->len;
			continue;
		}

		/* We now have our run length unit */
		KASSERT(up->ptr == uh,
		    ("Freeing free unit %d (range)\n", unit));

		/* Check if we can shift a unit to the previous one */
		upp = TAILQ_PREV(up, unithd, list);
		if (unit == x && upp != NULL && upp->ptr == NULL) {
			upp->len++;
			up->len--;
			uh->busy--;
			if (up->len == 0)
				elim_unit(uh, up);
			CHECK_UNITHDR(uh);
			return;
		}

		/* Check if we can shift a unit to the next one */
		upn = TAILQ_NEXT(up, list);
		if (unit == x + up->len - 1 &&
		    upn != NULL && upn->ptr == NULL) {
			upn->len++;
			up->len--;
			uh->busy--;
			if (up->len == 0)
				elim_unit(uh, up);
			CHECK_UNITHDR(uh);
			return;
		}

		/*
		 * Split the current unit in two or three
		 *
		 * We do this backwards to front to keel ul valid.
		 */
		pl = up->len - (1 + (unit - x));
		if (pl > 0) {
			/* The busy tail end */
			upp = new_unit();
			upp->ptr = uh;
			upp->len = pl;
			TAILQ_INSERT_AFTER(&uh->head, up, upp, list);
		}

		if (unit == x) {
			/* We are done splitting */
			up->len = 1;
			up->ptr = NULL;
		} else {
			/* The freed bit */
			upp = new_unit();
			upp->len = 1;
			upp->ptr = NULL;
			TAILQ_INSERT_AFTER(&uh->head, up, upp, list);
			/* Adjust current unit */
			up->len = unit - x;
		}

		/* Our ul element may have shifted one later */
		if (ul->len + xl <= l) {
			xl += ul->len;
			ul = TAILQ_NEXT(ul, list);
		}

		uh->busy--;
		CHECK_UNITHDR(uh);

		/* Count units entirely inside potential bitmap */
		n = 0;
		pl = xl;
		unit = l + NBITS;
		for (up = ul;
		     up != NULL && pl + up->len <= unit;
		     up = TAILQ_NEXT(up, list)) {
			if (pl >= l)
				n++;
			pl += up->len;
		}

		/* If less than three, a bitmap does not pay off */
		if (n < 3)
			return;

		/* Allocate bitmap */
		upp = new_unit();
		upp->ptr = new_unit();

		/* Tail end of ul element */
		pl = ul->len - (l - xl);
		if (ul->ptr != NULL) {
			bit_nset(upp->ptr, 0, pl - 1);
			upp->len = pl;
		}
		ul->len -= pl;
		/*
		 * We keep around ul even if it became zero sized, we need 
		 * it as a marker for where the bitmap goes in the tailq.
		 */
		l += pl;

		/* Soak up run length units until we have absorbed NBITS */
		while (pl != NBITS) {
			/* Grab first one in line */
			upn = TAILQ_NEXT(ul, list);
			/* We may not have a multiple of NBITS totally */
			if (upn == NULL)
				break;

			/* Run may extend past our new bitmap */
			n = NBITS - pl;
			if (n > upn->len)
				n = upn->len;

			if (upn->ptr != NULL) {
				bit_nset(upp->ptr, pl, pl + n - 1);
				upp->len += n;
			}
			pl += n;

			if (n != upn->len) {
				/* We did not absorbed the entire run */
				upn->len -= n;
				break;
			}
			TAILQ_REMOVE(&uh->head, upn, list);
			delete_unit(upn);
		}
		/* Insert the bitmap after ul */
		TAILQ_INSERT_AFTER(&uh->head, ul, upp, list);

		/* Ditch ul if it got reduced to zero size */
		if (ul->len == 0) {
			TAILQ_REMOVE(&uh->head, ul, list);
			delete_unit(ul);
		}
		CHECK_UNITHDR(uh);
		return;
	}
	KASSERT(0 != 1, ("Fell off the end in free_unit"));
}

#ifndef _KERNEL

/*
 * Simple stochastic test driver for the above functions
 */

static void
print_unit(struct unithdr *uh, struct unit *up)
{
	u_int x;

	printf("  %p len = %5u ptr = %p", up, up->len, up->ptr);
	if (up->ptr == uh || up->ptr == NULL) {
		printf("\n");
		return;
	}
	printf(" [");
	for (x = 0; x < NBITS; x++) {
		if (bit_test((bitstr_t *)up->ptr, x))
			putchar('#');
		else 
			putchar(' ');
	}
	printf("]\n");
}

static void
print_unithdr(struct unithdr *uh)
{
	struct unit *up;
	u_int x;

	printf("%p low = %u high = %u busy %u\n",
	    uh, uh->low, uh->high, uh->busy);
	x = uh->low;
	TAILQ_FOREACH(up, &uh->head, list) {
		printf("  from = %5u", x);
		print_unit(uh, up);
		if (up->ptr == NULL || up->ptr == uh)
			x += up->len;
		else
			x += NBITS;
	}
}

/* Number of units to test */
#define NN	10000

int
main(int argc __unused, const char **argv __unused)
{
	struct unithdr *uh;
	int i, x;
	char a[NN];

	uh = new_unithdr(0, NN - 1);

	memset(a, 0, sizeof a);

	printf("sizeof(struct unit) %d\n", sizeof (struct unit));
	printf("sizeof(struct unithdr) %d\n", sizeof (struct unithdr));
	x = 1;
	for (;;) {
		i = random() % NN;
		if (a[i]) {
			printf("F %u\n", i);
			free_unit(uh, i);
			a[i] = 0;
		} else {
			i = alloc_unit(uh);
			a[i] = 1;
			printf("A %u\n", i);
		}
		if (1)	/* XXX: change this for detailed debug printout */
			print_unithdr(uh);
		check_unithdr(uh);
	}
	return (0);
}
#endif

-- 
Poul-Henning Kamp       | UNIX since Zilog Zeus 3.20
phk at FreeBSD.ORG         | TCP/IP since RFC 956
FreeBSD committer       | BSD since 4.3-tahoe
Never attribute to malice what can adequately be explained by incompetence.


More information about the freebsd-arch mailing list