svn commit: r189389 - in head/lib/libarchive: . test

Wed Mar 4 18:19:43 PST 2009

Author: kientzle
Date: Thu Mar  5 02:19:42 2009
New Revision: 189389
URL: http://svn.freebsd.org/changeset/base/189389

Log:
  Merge r364, r378, r379, r393, and r539 from libarchive.googlecode.com:
  This is the last phase of the "big decompression refactor" that
  puts a lazy reblocking layer between each pair of read filters.
  I've also changed the terminology for this area---the two kinds
  of objects are now called "read filters" and "read filter bidders"---and
  moved ownership of these objects to the archive_read core.
  
  This greatly simplifies implementing new read filters, which
  can now use peek/consume I/O semantics both for bidding (arbitrary
  look-ahead!) and for reading streams (look-ahead simplifies handling
  concatenated streams, for instance).
  
  The first merge here is the overhaul proper; the remainder are small
  fixes to correct errors in the initial implementation.

Modified:
  head/lib/libarchive/archive_read.c
  head/lib/libarchive/archive_read_private.h
  head/lib/libarchive/archive_read_support_compression_bzip2.c
  head/lib/libarchive/archive_read_support_compression_compress.c
  head/lib/libarchive/archive_read_support_compression_gzip.c
  head/lib/libarchive/archive_read_support_compression_program.c
  head/lib/libarchive/test/test_read_position.c

Modified: head/lib/libarchive/archive_read.c
==============================================================================

--- head/lib/libarchive/archive_read.c	Thu Mar  5 01:59:49 2009	(r189388)
+++ head/lib/libarchive/archive_read.c	Thu Mar  5 02:19:42 2009	(r189389)
@@ -75,16 +75,6 @@ archive_read_new(void)
 	a->archive.state = ARCHIVE_STATE_NEW;
 	a->entry = archive_entry_new();
 
-	/* Initialize reblocking logic. */
-	a->buffer_size = 64 * 1024; /* 64k */
-	a->buffer = (char *)malloc(a->buffer_size);
-	a->next = a->buffer;
-	if (a->buffer == NULL) {
-		archive_entry_free(a->entry);
-		free(a);
-		return (NULL);
-	}
-
 	return (&a->archive);
 }
 
@@ -117,28 +107,33 @@ archive_read_open(struct archive *a, voi
 }
 
 static ssize_t
-client_read_proxy(struct archive_read_source *self, const void **buff)
+client_read_proxy(struct archive_read_filter *self, const void **buff)
 {
-	return (self->archive->client.reader)((struct archive *)self->archive,
+	ssize_t r;
+	r = (self->archive->client.reader)(&self->archive->archive,
 	    self->data, buff);
+	self->archive->archive.raw_position += r;
+	return (r);
 }
 
 static int64_t
-client_skip_proxy(struct archive_read_source *self, int64_t request)
+client_skip_proxy(struct archive_read_filter *self, int64_t request)
 {
-	return (self->archive->client.skipper)((struct archive *)self->archive,
+	int64_t r;
+	r = (self->archive->client.skipper)(&self->archive->archive,
 	    self->data, request);
+	self->archive->archive.raw_position += r;
+	return (r);
 }
 
 static int
-client_close_proxy(struct archive_read_source *self)
+client_close_proxy(struct archive_read_filter *self)
 {
 	int r = ARCHIVE_OK;
 
 	if (self->archive->client.closer != NULL)
 		r = (self->archive->client.closer)((struct archive *)self->archive,
 		    self->data);
-	free(self);
 	return (r);
 }
 
@@ -151,6 +146,7 @@ archive_read_open2(struct archive *_a, v
     archive_close_callback *client_closer)
 {
 	struct archive_read *a = (struct archive_read *)_a;
+	struct archive_read_filter *filter;
 	int e;
 
 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
@@ -172,27 +168,21 @@ archive_read_open2(struct archive *_a, v
 	}
 
 	/* Save the client functions and mock up the initial source. */
-	a->client.opener = client_opener; /* Do we need to remember this? */
 	a->client.reader = client_reader;
 	a->client.skipper = client_skipper;
 	a->client.closer = client_closer;
-	a->client.data = client_data;
 
-	{
-		struct archive_read_source *source;
-
-		source = calloc(1, sizeof(*source));
-		if (source == NULL)
-			return (ARCHIVE_FATAL);
-		source->reader = NULL;
-		source->upstream = NULL;
-		source->archive = a;
-		source->data = client_data;
-		source->read = client_read_proxy;
-		source->skip = client_skip_proxy;
-		source->close = client_close_proxy;
-		a->source = source;
-	}
+	filter = calloc(1, sizeof(*filter));
+	if (filter == NULL)
+		return (ARCHIVE_FATAL);
+	filter->bidder = NULL;
+	filter->upstream = NULL;
+	filter->archive = a;
+	filter->data = client_data;
+	filter->read = client_read_proxy;
+	filter->skip = client_skip_proxy;
+	filter->close = client_close_proxy;
+	a->filter = filter;
 
 	/* In case there's no filter. */
 	a->archive.compression_code = ARCHIVE_COMPRESSION_NONE;
@@ -214,60 +204,49 @@ archive_read_open2(struct archive *_a, v
 static int
 build_stream(struct archive_read *a)
 {
-	int number_readers, i, bid, best_bid;
-	struct archive_reader *reader, *best_reader;
-	struct archive_read_source *source;
-	const void *block;
-	ssize_t bytes_read;
-
-	/* Read first block now for compress format detection. */
-	bytes_read = (a->source->read)(a->source, &block);
-	if (bytes_read < 0) {
-		/* If the first read fails, close before returning error. */
-		if (a->source->close != NULL) {
-			(a->source->close)(a->source);
-			a->source = NULL;
-		}
-		/* source->read should have already set error information. */
-		return (ARCHIVE_FATAL);
-	}
+	int number_bidders, i, bid, best_bid;
+	struct archive_read_filter_bidder *bidder, *best_bidder;
+	struct archive_read_filter *filter;
+	int r;
 
-	number_readers = sizeof(a->readers) / sizeof(a->readers[0]);
+	for (;;) {
+		number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]);
 
-	best_bid = 0;
-	best_reader = NULL;
+		best_bid = 0;
+		best_bidder = NULL;
 
-	reader = a->readers;
-	for (i = 0, reader = a->readers; i < number_readers; i++, reader++) {
-		if (reader->bid != NULL) {
-			bid = (reader->bid)(reader, block, bytes_read);
-			if (bid > best_bid) {
-				best_bid = bid;
-				best_reader = reader;
+		bidder = a->bidders;
+		for (i = 0; i < number_bidders; i++, bidder++) {
+			if (bidder->bid != NULL) {
+				bid = (bidder->bid)(bidder, a->filter);
+				if (bid > best_bid) {
+					best_bid = bid;
+					best_bidder = bidder;
+				}
 			}
 		}
-	}
 
-	/*
-	 * If we have a winner, it becomes the next stage in the pipeline.
-	 */
-	if (best_reader != NULL) {
-		source = (best_reader->init)(a, best_reader, a->source,
-		    block, bytes_read);
-		if (source == NULL)
+		/* If no bidder, we're done. */
+		if (best_bidder == NULL) {
+			a->archive.compression_name = a->filter->name;
+			a->archive.compression_code = a->filter->code;
+			return (ARCHIVE_OK);
+		}
+
+		filter
+		    = (struct archive_read_filter *)calloc(1, sizeof(*filter));
+		if (filter == NULL)
 			return (ARCHIVE_FATAL);
-		/* Record the best decompressor for this stream. */
-		a->source = source;
-		/* Recurse to get next pipeline stage. */
-		return (build_stream(a));
+		filter->bidder = best_bidder;
+		filter->archive = a;
+		filter->upstream = a->filter;
+		r = (best_bidder->init)(filter);
+		if (r != ARCHIVE_OK) {
+			free(filter);
+			return (r);
+		}
+		a->filter = filter;
 	}
-
-	/* Save first block of data. */
-	a->client_buff = block;
-	a->client_total = bytes_read;
-	a->client_next = a->client_buff;
-	a->client_avail = a->client_total;
-	return (ARCHIVE_OK);
 }
 
 /*
@@ -594,20 +573,24 @@ archive_read_close(struct archive *_a)
 
 	/* TODO: Clean up the formatters. */
 
-	/* Clean up the stream pipeline. */
-	while (a->source != NULL) {
-		struct archive_read_source *t = a->source->upstream;
-		r1 = (a->source->close)(a->source);
-		if (r1 < r)
-			r = r1;
-		a->source = t;
+	/* Clean up the filter pipeline. */
+	while (a->filter != NULL) {
+		struct archive_read_filter *t = a->filter->upstream;
+		if (a->filter->close != NULL) {
+			r1 = (a->filter->close)(a->filter);
+			if (r1 < r)
+				r = r1;
+		}
+		free(a->filter->buffer);
+		free(a->filter);
+		a->filter = t;
 	}
 
-	/* Release the reader objects. */
-	n = sizeof(a->readers)/sizeof(a->readers[0]);
+	/* Release the bidder objects. */
+	n = sizeof(a->bidders)/sizeof(a->bidders[0]);
 	for (i = 0; i < n; i++) {
-		if (a->readers[i].free != NULL) {
-			r1 = (a->readers[i].free)(&a->readers[i]);
+		if (a->bidders[i].free != NULL) {
+			r1 = (a->bidders[i].free)(&a->bidders[i]);
 			if (r1 < r)
 				r = r1;
 		}
@@ -649,7 +632,6 @@ archive_read_finish(struct archive *_a)
 	if (a->entry)
 		archive_entry_free(a->entry);
 	a->archive.magic = 0;
-	free(a->buffer);
 	free(a);
 #if ARCHIVE_API_VERSION > 1
 	return (r);
@@ -699,20 +681,20 @@ __archive_read_register_format(struct ar
  * Used internally by decompression routines to register their bid and
  * initialization functions.
  */
-struct archive_reader *
-__archive_read_get_reader(struct archive_read *a)
+struct archive_read_filter_bidder *
+__archive_read_get_bidder(struct archive_read *a)
 {
 	int i, number_slots;
 
 	__archive_check_magic(&a->archive,
 	    ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
-	    "__archive_read_get_reader");
+	    "__archive_read_get_bidder");
 
-	number_slots = sizeof(a->readers) / sizeof(a->readers[0]);
+	number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]);
 
 	for (i = 0; i < number_slots; i++) {
-		if (a->readers[i].bid == NULL)
-			return (a->readers + i);
+		if (a->bidders[i].bid == NULL)
+			return (a->bidders + i);
 	}
 
 	__archive_errx(1, "Not enough slots for compression registration");
@@ -725,7 +707,7 @@ __archive_read_get_reader(struct archive
  * flexible read-ahead and allows the I/O code to operate in a
  * zero-copy manner most of the time.
  *
- * In the ideal case, block providers give the I/O code blocks of data
+ * In the ideal case, filters generate blocks of data
  * and __archive_read_ahead() just returns pointers directly into
  * those blocks.  Then __archive_read_consume() just bumps those
  * pointers.  Only if your request would span blocks does the I/O
@@ -738,7 +720,7 @@ __archive_read_get_reader(struct archive
  *  * "I just want some data."  Ask for 1 byte and pay attention to
  *    the "number of bytes available" from __archive_read_ahead().
  *    You can consume more than you asked for; you just can't consume
- *    more than is available right now.  If you consume everything that's
+ *    more than is available.  If you consume everything that's
  *    immediately available, the next read_ahead() call will pull
  *    the next block.
  *  * "I want to output a large block of data."  As above, ask for 1 byte,
@@ -790,10 +772,17 @@ __archive_read_get_reader(struct archive
 const void *
 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
 {
+	return (__archive_read_filter_ahead(a->filter, min, avail));
+}
+
+const void *
+__archive_read_filter_ahead(struct archive_read_filter *filter,
+    size_t min, ssize_t *avail)
+{
 	ssize_t bytes_read;
 	size_t tocopy;
 
-	if (a->fatal) {
+	if (filter->fatal) {
 		if (avail)
 			*avail = ARCHIVE_FATAL;
 		return (NULL);
@@ -807,68 +796,68 @@ __archive_read_ahead(struct archive_read
 		/*
 		 * If we can satisfy from the copy buffer, we're done.
 		 */
-		if (a->avail >= min) {
+		if (filter->avail >= min) {
 			if (avail != NULL)
-				*avail = a->avail;
-			return (a->next);
+				*avail = filter->avail;
+			return (filter->next);
 		}
 
 		/*
 		 * We can satisfy directly from client buffer if everything
 		 * currently in the copy buffer is still in the client buffer.
 		 */
-		if (a->client_total >= a->client_avail + a->avail
-		    && a->client_avail + a->avail >= min) {
+		if (filter->client_total >= filter->client_avail + filter->avail
+		    && filter->client_avail + filter->avail >= min) {
 			/* "Roll back" to client buffer. */
-			a->client_avail += a->avail;
-			a->client_next -= a->avail;
+			filter->client_avail += filter->avail;
+			filter->client_next -= filter->avail;
 			/* Copy buffer is now empty. */
-			a->avail = 0;
-			a->next = a->buffer;
+			filter->avail = 0;
+			filter->next = filter->buffer;
 			/* Return data from client buffer. */
 			if (avail != NULL)
-				*avail = a->client_avail;
-			return (a->client_next);
+				*avail = filter->client_avail;
+			return (filter->client_next);
 		}
 
 		/* Move data forward in copy buffer if necessary. */
-		if (a->next > a->buffer &&
-		    a->next + min > a->buffer + a->buffer_size) {
-			if (a->avail > 0)
-				memmove(a->buffer, a->next, a->avail);
-			a->next = a->buffer;
+		if (filter->next > filter->buffer &&
+		    filter->next + min > filter->buffer + filter->buffer_size) {
+			if (filter->avail > 0)
+				memmove(filter->buffer, filter->next, filter->avail);
+			filter->next = filter->buffer;
 		}
 
 		/* If we've used up the client data, get more. */
-		if (a->client_avail <= 0) {
-			if (a->end_of_file) {
+		if (filter->client_avail <= 0) {
+			if (filter->end_of_file) {
 				if (avail != NULL)
 					*avail = 0;
 				return (NULL);
 			}
-			bytes_read = (a->source->read)(a->source,
-			    &a->client_buff);
+			bytes_read = (filter->read)(filter,
+			    &filter->client_buff);
 			if (bytes_read < 0) {		/* Read error. */
-				a->client_total = a->client_avail = 0;
-				a->client_next = a->client_buff = NULL;
-				a->fatal = 1;
+				filter->client_total = filter->client_avail = 0;
+				filter->client_next = filter->client_buff = NULL;
+				filter->fatal = 1;
 				if (avail != NULL)
 					*avail = ARCHIVE_FATAL;
 				return (NULL);
 			}
 			if (bytes_read == 0) {	/* Premature end-of-file. */
-				a->client_total = a->client_avail = 0;
-				a->client_next = a->client_buff = NULL;
-				a->end_of_file = 1;
+				filter->client_total = filter->client_avail = 0;
+				filter->client_next = filter->client_buff = NULL;
+				filter->end_of_file = 1;
 				/* Return whatever we do have. */
 				if (avail != NULL)
-					*avail = a->avail;
+					*avail = filter->avail;
 				return (NULL);
 			}
-			a->archive.raw_position += bytes_read;
-			a->client_total = bytes_read;
-			a->client_avail = a->client_total;
-			a->client_next = a->client_buff;
+			filter->position += bytes_read;
+			filter->client_total = bytes_read;
+			filter->client_avail = filter->client_total;
+			filter->client_next = filter->client_buff;
 		}
 		else
 		{
@@ -880,19 +869,22 @@ __archive_read_ahead(struct archive_read
 			 */
 
 			/* Ensure the buffer is big enough. */
-			if (min > a->buffer_size) {
+			if (min > filter->buffer_size) {
 				size_t s, t;
 				char *p;
 
 				/* Double the buffer; watch for overflow. */
-				s = t = a->buffer_size;
+				s = t = filter->buffer_size;
+				if (s == 0)
+					s = min;
 				while (s < min) {
 					t *= 2;
 					if (t <= s) { /* Integer overflow! */
-						archive_set_error(&a->archive,
-						    ENOMEM,
+						archive_set_error(
+							&filter->archive->archive,
+							ENOMEM,
 						    "Unable to allocate copy buffer");
-						a->fatal = 1;
+						filter->fatal = 1;
 						if (avail != NULL)
 							*avail = ARCHIVE_FATAL;
 						return (NULL);
@@ -902,39 +894,41 @@ __archive_read_ahead(struct archive_read
 				/* Now s >= min, so allocate a new buffer. */
 				p = (char *)malloc(s);
 				if (p == NULL) {
-					archive_set_error(&a->archive, ENOMEM,
+					archive_set_error(
+						&filter->archive->archive,
+						ENOMEM,
 					    "Unable to allocate copy buffer");
-					a->fatal = 1;
+					filter->fatal = 1;
 					if (avail != NULL)
 						*avail = ARCHIVE_FATAL;
 					return (NULL);
 				}
 				/* Move data into newly-enlarged buffer. */
-				if (a->avail > 0)
-					memmove(p, a->next, a->avail);
-				free(a->buffer);
-				a->next = a->buffer = p;
-				a->buffer_size = s;
+				if (filter->avail > 0)
+					memmove(p, filter->next, filter->avail);
+				free(filter->buffer);
+				filter->next = filter->buffer = p;
+				filter->buffer_size = s;
 			}
 
 			/* We can add client data to copy buffer. */
 			/* First estimate: copy to fill rest of buffer. */
-			tocopy = (a->buffer + a->buffer_size)
-			    - (a->next + a->avail);
+			tocopy = (filter->buffer + filter->buffer_size)
+			    - (filter->next + filter->avail);
 			/* Don't waste time buffering more than we need to. */
-			if (tocopy + a->avail > min)
-				tocopy = min - a->avail;
+			if (tocopy + filter->avail > min)
+				tocopy = min - filter->avail;
 			/* Don't copy more than is available. */
-			if (tocopy > a->client_avail)
-				tocopy = a->client_avail;
+			if (tocopy > filter->client_avail)
+				tocopy = filter->client_avail;
 
-			memcpy(a->next + a->avail, a->client_next,
+			memcpy(filter->next + filter->avail, filter->client_next,
 			    tocopy);
 			/* Remove this data from client buffer. */
-			a->client_next += tocopy;
-			a->client_avail -= tocopy;
+			filter->client_next += tocopy;
+			filter->client_avail -= tocopy;
 			/* add it to copy buffer. */
-			a->avail += tocopy;
+			filter->avail += tocopy;
 		}
 	}
 }
@@ -953,16 +947,25 @@ __archive_read_ahead(struct archive_read
 ssize_t
 __archive_read_consume(struct archive_read *a, size_t request)
 {
-	if (a->avail > 0) {
+	ssize_t r;
+	r = __archive_read_filter_consume(a->filter, request);
+	a->archive.file_position += r;
+	return (r);
+}
+
+ssize_t
+__archive_read_filter_consume(struct archive_read_filter * filter,
+    size_t request)
+{
+	if (filter->avail > 0) {
 		/* Read came from copy buffer. */
-		a->next += request;
-		a->avail -= request;
+		filter->next += request;
+		filter->avail -= request;
 	} else {
 		/* Read came from client buffer. */
-		a->client_next += request;
-		a->client_avail -= request;
+		filter->client_next += request;
+		filter->client_avail -= request;
 	}
-	a->archive.file_position += request;
 	return (request);
 }
 
@@ -976,23 +979,29 @@ __archive_read_consume(struct archive_re
 int64_t
 __archive_read_skip(struct archive_read *a, int64_t request)
 {
+	return (__archive_read_filter_skip(a->filter, request));
+}
+
+int64_t
+__archive_read_filter_skip(struct archive_read_filter *filter, int64_t request)
+{
 	off_t bytes_skipped, total_bytes_skipped = 0;
 	size_t min;
 
-	if (a->fatal)
+	if (filter->fatal)
 		return (-1);
 	/*
 	 * If there is data in the buffers already, use that first.
 	 */
-	if (a->avail > 0) {
-		min = minimum(request, (off_t)a->avail);
-		bytes_skipped = __archive_read_consume(a, min);
+	if (filter->avail > 0) {
+		min = minimum(request, (off_t)filter->avail);
+		bytes_skipped = __archive_read_consume(filter->archive, min);
 		request -= bytes_skipped;
 		total_bytes_skipped += bytes_skipped;
 	}
-	if (a->client_avail > 0) {
-		min = minimum(request, (off_t)a->client_avail);
-		bytes_skipped = __archive_read_consume(a, min);
+	if (filter->client_avail > 0) {
+		min = minimum(request, (off_t)filter->client_avail);
+		bytes_skipped = __archive_read_consume(filter->archive, min);
 		request -= bytes_skipped;
 		total_bytes_skipped += bytes_skipped;
 	}
@@ -1002,23 +1011,22 @@ __archive_read_skip(struct archive_read 
 	 * If a client_skipper was provided, try that first.
 	 */
 #if ARCHIVE_API_VERSION < 2
-	if ((a->source->skip != NULL) && (request < SSIZE_MAX)) {
+	if ((filter->skip != NULL) && (request < SSIZE_MAX)) {
 #else
-	if (a->source->skip != NULL) {
+	if (filter->skip != NULL) {
 #endif
-		bytes_skipped = (a->source->skip)(a->source, request);
+		bytes_skipped = (filter->skip)(filter, request);
 		if (bytes_skipped < 0) {	/* error */
-			a->client_total = a->client_avail = 0;
-			a->client_next = a->client_buff = NULL;
-			a->fatal = 1;
+			filter->client_total = filter->client_avail = 0;
+			filter->client_next = filter->client_buff = NULL;
+			filter->fatal = 1;
 			return (bytes_skipped);
 		}
+		filter->archive->archive.file_position += bytes_skipped;
 		total_bytes_skipped += bytes_skipped;
-		a->archive.file_position += bytes_skipped;
 		request -= bytes_skipped;
-		a->client_next = a->client_buff;
-		a->archive.raw_position += bytes_skipped;
-		a->client_avail = a->client_total = 0;
+		filter->client_next = filter->client_buff;
+		filter->client_avail = filter->client_total = 0;
 	}
 	/*
 	 * Note that client_skipper will usually not satisfy the
@@ -1029,18 +1037,20 @@ __archive_read_skip(struct archive_read 
 	while (request > 0) {
 		const void* dummy_buffer;
 		ssize_t bytes_read;
-		dummy_buffer = __archive_read_ahead(a, 1, &bytes_read);
+		dummy_buffer = __archive_read_ahead(filter->archive,
+		    1, &bytes_read);
 		if (bytes_read < 0)
 			return (bytes_read);
 		if (bytes_read == 0) {
 			/* We hit EOF before we satisfied the skip request. */
-			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+			archive_set_error(&filter->archive->archive,
+			    ARCHIVE_ERRNO_MISC,
 			    "Truncated input file (need to skip %jd bytes)",
 			    (intmax_t)request);
 			return (ARCHIVE_FATAL);
 		}
 		min = (size_t)(minimum(bytes_read, request));
-		bytes_read = __archive_read_consume(a, min);
+		bytes_read = __archive_read_consume(filter->archive, min);
 		total_bytes_skipped += bytes_read;
 		request -= bytes_read;
 	}

Modified: head/lib/libarchive/archive_read_private.h
==============================================================================
--- head/lib/libarchive/archive_read_private.h	Thu Mar  5 01:59:49 2009	(r189388)
+++ head/lib/libarchive/archive_read_private.h	Thu Mar  5 02:19:42 2009	(r189389)
@@ -33,72 +33,80 @@
 #include "archive_private.h"
 
 struct archive_read;
-struct archive_reader;
-struct archive_read_source;
+struct archive_read_filter_bidder;
+struct archive_read_filter;
 
 /*
- * A "reader" knows how to provide blocks.  That can include something
- * that reads blocks from disk or socket or a transformation layer
- * that reads blocks from another source and transforms them.  This
- * includes decompression and decryption filters.
- *
- * How bidding works:
+ * How bidding works for filters:
  *   * The bid manager reads the first block from the current source.
  *   * It shows that block to each registered bidder.
- *   * The winning bidder is initialized (with the block and information
- *     about the source)
- *   * The winning bidder becomes the new source and the process repeats
- * This ends only when no reader provides a non-zero bid.
+ *   * The bid manager creates a new filter structure for the winning
+ *     bidder and gives the winning bidder a chance to initialize it.
+ *   * The new filter becomes the top filter in the archive_read structure
+ *     and we repeat the process.
+ * This ends only when no bidder provides a non-zero bid.
  */
-struct archive_reader {
-	/* Configuration data for the reader. */
+struct archive_read_filter_bidder {
+	/* Configuration data for the bidder. */
 	void *data;
-	/* Bidder is handed the initial block from its source. */
-	int (*bid)(struct archive_reader *, const void *buff, size_t);
-	/* Init() is given the archive, upstream source, and the initial
-	 * block above.  It returns a populated source structure. */
-	struct archive_read_source *(*init)(struct archive_read *,
-	    struct archive_reader *, struct archive_read_source *source,
-	    const void *, size_t);
-	/* Release the reader and any configuration data it allocated. */
-	int (*free)(struct archive_reader *);
+	/* Taste the upstream filter to see if we handle this. */
+	int (*bid)(struct archive_read_filter_bidder *,
+	    struct archive_read_filter *);
+	/* Initialize a newly-created filter. */
+	int (*init)(struct archive_read_filter *);
+	/* Release the bidder's configuration data. */
+	int (*free)(struct archive_read_filter_bidder *);
 };
 
 /*
- * A "source" is an instance of a reader.  This structure is
- * allocated and initialized by the init() method of a reader
- * above.
+ * This structure is allocated within the archive_read core
+ * and initialized by archive_read and the init() method of the
+ * corresponding bidder above.
  */
-struct archive_read_source {
-	/* Essentially all sources will need these values, so
+struct archive_read_filter {
+	/* Essentially all filters will need these values, so
 	 * just declare them here. */
-	struct archive_reader *reader; /* Reader that I'm an instance of. */
-	struct archive_read_source *upstream; /* Who I get blocks from. */
-	struct archive_read *archive; /* associated archive. */
+	struct archive_read_filter_bidder *bidder; /* My bidder. */
+	struct archive_read_filter *upstream; /* Who I read from. */
+	struct archive_read *archive; /* Associated archive. */
 	/* Return next block. */
-	ssize_t (*read)(struct archive_read_source *, const void **);
+	ssize_t (*read)(struct archive_read_filter *, const void **);
 	/* Skip forward this many bytes. */
-	int64_t (*skip)(struct archive_read_source *self, int64_t request);
-	/* Close (recursively) and free(self). */
-	int (*close)(struct archive_read_source *self);
+	int64_t (*skip)(struct archive_read_filter *self, int64_t request);
+	/* Close (just this filter) and free(self). */
+	int (*close)(struct archive_read_filter *self);
 	/* My private data. */
 	void *data;
+
+	const char	*name;
+	int		 code;
+
+	/* Used by reblocking logic. */
+	char		*buffer;
+	size_t		 buffer_size;
+	char		*next;		/* Current read location. */
+	size_t		 avail;		/* Bytes in my buffer. */
+	const void	*client_buff;	/* Client buffer information. */
+	size_t		 client_total;
+	const char	*client_next;
+	size_t		 client_avail;
+	int64_t		 position;
+	char		 end_of_file;
+	char		 fatal;
 };
 
 /*
- * The client source is almost the same as an internal source.
+ * The client looks a lot like a filter, so we just wrap it here.
  *
- * TODO: Make archive_read_source and archive_read_client identical so
+ * TODO: Make archive_read_filter and archive_read_client identical so
  * that users of the library can easily register their own
  * transformation filters.  This will probably break the API/ABI and
- * so should be deferred until libarchive 3.0.
+ * so should be deferred at least until libarchive 3.0.
  */
 struct archive_read_client {
-	archive_open_callback	*opener;
 	archive_read_callback	*reader;
 	archive_skip_callback	*skipper;
 	archive_close_callback	*closer;
-	void			*data;
 };
 
 struct archive_read {
@@ -122,28 +130,15 @@ struct archive_read {
 	/* Callbacks to open/read/write/close client archive stream. */
 	struct archive_read_client client;
 
-	/* Registered readers. */
-	struct archive_reader readers[8];
+	/* Registered filter bidders. */
+	struct archive_read_filter_bidder bidders[8];
 
-	/* Source */
-	struct archive_read_source *source;
+	/* Last filter in chain */
+	struct archive_read_filter *filter;
 
 	/* File offset of beginning of most recently-read header. */
 	off_t		  header_position;
 
-
-	/* Used by reblocking logic. */
-	char		*buffer;
-	size_t		 buffer_size;
-	char		*next;		/* Current read location. */
-	size_t		 avail;		/* Bytes in my buffer. */
-	const void	*client_buff;	/* Client buffer information. */
-	size_t		 client_total;
-	const char	*client_next;
-	size_t		 client_avail;
-	char		 end_of_file;
-	char		 fatal;
-
 	/*
 	 * Format detection is mostly the same as compression
 	 * detection, with one significant difference: The bidders
@@ -177,13 +172,14 @@ int	__archive_read_register_format(struc
 	    int (*read_data_skip)(struct archive_read *),
 	    int (*cleanup)(struct archive_read *));
 
-struct archive_reader
-	*__archive_read_get_reader(struct archive_read *a);
+struct archive_read_filter_bidder
+	*__archive_read_get_bidder(struct archive_read *a);
 
-const void
-	*__archive_read_ahead(struct archive_read *, size_t, ssize_t *);
-ssize_t
-	__archive_read_consume(struct archive_read *, size_t);
-int64_t
-	__archive_read_skip(struct archive_read *, int64_t);
+const void *__archive_read_ahead(struct archive_read *, size_t, ssize_t *);
+const void *__archive_read_filter_ahead(struct archive_read_filter *,
+    size_t, ssize_t *);
+ssize_t	__archive_read_consume(struct archive_read *, size_t);
+ssize_t	__archive_read_filter_consume(struct archive_read_filter *, size_t);
+int64_t	__archive_read_skip(struct archive_read *, int64_t);
+int64_t	__archive_read_filter_skip(struct archive_read_filter *, int64_t);
 #endif

Modified: head/lib/libarchive/archive_read_support_compression_bzip2.c
==============================================================================
--- head/lib/libarchive/archive_read_support_compression_bzip2.c	Thu Mar  5 01:59:49 2009	(r189388)
+++ head/lib/libarchive/archive_read_support_compression_bzip2.c	Thu Mar  5 02:19:42 2009	(r189389)
@@ -57,9 +57,9 @@ struct private_data {
 	char		 eof; /* True = found end of compressed data. */
 };
 
-/* Bzip2 source */
-static ssize_t	bzip2_source_read(struct archive_read_source *, const void **);
-static int	bzip2_source_close(struct archive_read_source *);
+/* Bzip2 filter */
+static ssize_t	bzip2_filter_read(struct archive_read_filter *, const void **);
+static int	bzip2_filter_close(struct archive_read_filter *);
 #endif
 
 /*
@@ -68,17 +68,15 @@ static int	bzip2_source_close(struct arc
  * error messages.)  So the bid framework here gets compiled even
  * if bzlib is unavailable.
  */
-static int	bzip2_reader_bid(struct archive_reader *, const void *, size_t);
-static struct archive_read_source *bzip2_reader_init(struct archive_read *,
-    struct archive_reader *, struct archive_read_source *,
-    const void *, size_t);
-static int	bzip2_reader_free(struct archive_reader *);
+static int	bzip2_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
+static int	bzip2_reader_init(struct archive_read_filter *);
+static int	bzip2_reader_free(struct archive_read_filter_bidder *);
 
 int
 archive_read_support_compression_bzip2(struct archive *_a)
 {
 	struct archive_read *a = (struct archive_read *)_a;
-	struct archive_reader *reader = __archive_read_get_reader(a);
+	struct archive_read_filter_bidder *reader = __archive_read_get_bidder(a);
 
 	if (reader == NULL)
 		return (ARCHIVE_FATAL);
@@ -91,7 +89,7 @@ archive_read_support_compression_bzip2(s
 }
 
 static int
-bzip2_reader_free(struct archive_reader *self){
+bzip2_reader_free(struct archive_read_filter_bidder *self){
 	(void)self; /* UNUSED */
 	return (ARCHIVE_OK);
 }
@@ -104,61 +102,38 @@ bzip2_reader_free(struct archive_reader 
  * from verifying as much as we would like.
  */
 static int
-bzip2_reader_bid(struct archive_reader *self, const void *buff, size_t len)
+bzip2_reader_bid(struct archive_read_filter_bidder *self, struct archive_read_filter *filter)
 {
 	const unsigned char *buffer;
+	size_t avail;
 	int bits_checked;
 
 	(void)self; /* UNUSED */
 
-	if (len < 1)
+	/* Minimal bzip2 archive is 14 bytes. */
+	buffer = __archive_read_filter_ahead(filter, 14, &avail);
+	if (buffer == NULL)
 		return (0);
 
-	buffer = (const unsigned char *)buff;
+	/* First three bytes must be "BZh" */
 	bits_checked = 0;
-	if (buffer[0] != 'B')	/* Verify first ID byte. */
+	if (buffer[0] != 'B' || buffer[1] != 'Z' || buffer[2] != 'h')
 		return (0);
-	bits_checked += 8;
-	if (len < 2)
-		return (bits_checked);
-
-	if (buffer[1] != 'Z')	/* Verify second ID byte. */
-		return (0);
-	bits_checked += 8;
-	if (len < 3)
-		return (bits_checked);
-
-	if (buffer[2] != 'h')	/* Verify third ID byte. */
-		return (0);
-	bits_checked += 8;
-	if (len < 4)
-		return (bits_checked);
+	bits_checked += 24;
 
+	/* Next follows a compression flag which must be an ASCII digit. */
 	if (buffer[3] < '1' || buffer[3] > '9')
 		return (0);
 	bits_checked += 5;
-	if (len < 5)
-		return (bits_checked);
 
 	/* After BZh[1-9], there must be either a data block
 	 * which begins with 0x314159265359 or an end-of-data
 	 * marker of 0x177245385090. */
-
-	if (buffer[4] == 0x31) {
-		/* Verify the data block signature. */
-		size_t s = len;
-		if (s > 10) s = 10;
-		if (memcmp(buffer + 4, "\x31\x41\x59\x26\x53\x59", s - 4) != 0)
-			return (0);
-		bits_checked += 8 * (s - 4);
-	} else if (buffer[4] == 0x17) {
-		/* Verify the end-of-data marker. */
-		size_t s = len;
-		if (s > 10) s = 10;
-		if (memcmp(buffer + 4, "\x17\x72\x45\x38\x50\x90", s - 4) != 0)
-			return (0);
-		bits_checked += 8 * (s - 4);
-	} else
+	if (memcmp(buffer + 4, "\x31\x41\x59\x26\x53\x59", 6) == 0)
+		bits_checked += 48;
+	else if (memcmp(buffer + 4, "\x17\x72\x45\x38\x50\x90", 6) == 0)
+		bits_checked += 48;
+	else
 		return (0);
 
 	return (bits_checked);
@@ -171,19 +146,13 @@ bzip2_reader_bid(struct archive_reader *
  * decompression.  We can, however, still detect compressed archives
  * and emit a useful message.
  */
-static struct archive_read_source *
-bzip2_reader_init(struct archive_read *a, struct archive_reader *reader,
-    struct archive_read_source *upstream, const void *buff, size_t n)
+static int
+bzip2_reader_init(struct archive_read_filter *self)
 {
-	(void)a;	/* UNUSED */
-	(void)reader;	/* UNUSED */
-	(void)upstream; /* UNUSED */
-	(void)buff;	/* UNUSED */
-	(void)n;	/* UNUSED */
 
-	archive_set_error(&a->archive, -1,
+	archive_set_error(&self->archive->archive, -1,
 	    "This version of libarchive was compiled without bzip2 support");
-	return (NULL);
+	return (ARCHIVE_FATAL);
 }
 
 
@@ -192,67 +161,45 @@ bzip2_reader_init(struct archive_read *a
 /*
  * Setup the callbacks.
  */
-static struct archive_read_source *
-bzip2_reader_init(struct archive_read *a, struct archive_reader *reader,
-    struct archive_read_source *upstream, const void *buff, size_t n)
+static int
+bzip2_reader_init(struct archive_read_filter *self)
 {
 	static const size_t out_block_size = 64 * 1024;
 	void *out_block;
-	struct archive_read_source *self;
 	struct private_data *state;
 
-	(void)reader; /* UNUSED */
+	self->code = ARCHIVE_COMPRESSION_BZIP2;
+	self->name = "bzip2";
 
-	a->archive.compression_code = ARCHIVE_COMPRESSION_BZIP2;
-	a->archive.compression_name = "bzip2";
-
-	self = calloc(sizeof(*self), 1);
 	state = (struct private_data *)calloc(sizeof(*state), 1);
 	out_block = (unsigned char *)malloc(out_block_size);
 	if (self == NULL || state == NULL || out_block == NULL) {
-		archive_set_error(&a->archive, ENOMEM,
-		    "Can't allocate data for %s decompression",
-		    a->archive.compression_name);
+		archive_set_error(&self->archive->archive, ENOMEM,
+		    "Can't allocate data for bzip2 decompression");
 		free(out_block);
 		free(state);
-		free(self);
-		return (NULL);
+		return (ARCHIVE_FATAL);
 	}
 
-
-	self->archive = a;
 	self->data = state;
 	state->out_block_size = out_block_size;
 	state->out_block = out_block;
-	self->upstream = upstream;
-	self->read = bzip2_source_read;
+	self->read = bzip2_filter_read;
 	self->skip = NULL; /* not supported */
-	self->close = bzip2_source_close;
-
-	/*
-	 * A bug in bzlib.h: stream.next_in should be marked 'const'
-	 * but isn't (the library never alters data through the

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***