svn commit: r185679 - head/lib/libarchive
Tim Kientzle
kientzle at FreeBSD.org
Fri Dec 5 22:45:16 PST 2008
Author: kientzle
Date: Sat Dec 6 06:45:15 2008
New Revision: 185679
URL: http://svn.freebsd.org/changeset/base/185679
Log:
MfP4: Big read filter refactoring.
This is an attempt to eliminate a lot of redundant
code from the read ("decompression") filters by
changing them to juggle arbitrary-sized blocks
and consolidate reblocking code at a single point
in archive_read.c.
Along the way, I've changed the internal read/consume
API used by the format handlers to a slightly
different style originally suggested by des at . It
does seem to simplify a lot of common cases.
The most dramatic change is, of course, to
archive_read_support_compression_none(), which
has just evaporated into a no-op as the blocking
code this used to hold has all been moved up
a level.
There's at least one more big round of refactoring
yet to come before the individual filters are as
straightforward as I think they should be...
Modified:
head/lib/libarchive/archive_read.c
head/lib/libarchive/archive_read_private.h
head/lib/libarchive/archive_read_support_compression_all.c
head/lib/libarchive/archive_read_support_compression_bzip2.c
head/lib/libarchive/archive_read_support_compression_compress.c
head/lib/libarchive/archive_read_support_compression_gzip.c
head/lib/libarchive/archive_read_support_compression_none.c
head/lib/libarchive/archive_read_support_compression_program.c
head/lib/libarchive/archive_read_support_format_ar.c
head/lib/libarchive/archive_read_support_format_cpio.c
head/lib/libarchive/archive_read_support_format_empty.c
head/lib/libarchive/archive_read_support_format_iso9660.c
head/lib/libarchive/archive_read_support_format_mtree.c
head/lib/libarchive/archive_read_support_format_tar.c
head/lib/libarchive/archive_read_support_format_zip.c
Modified: head/lib/libarchive/archive_read.c
==============================================================================
--- head/lib/libarchive/archive_read.c Sat Dec 6 06:23:37 2008 (r185678)
+++ head/lib/libarchive/archive_read.c Sat Dec 6 06:45:15 2008 (r185679)
@@ -53,9 +53,10 @@ __FBSDID("$FreeBSD$");
#include "archive_private.h"
#include "archive_read_private.h"
-static void choose_decompressor(struct archive_read *, const void*, size_t);
+#define minimum(a, b) (a < b ? a : b)
+
+static int build_stream(struct archive_read *);
static int choose_format(struct archive_read *);
-static off_t dummy_skip(struct archive_read *, off_t);
/*
* Allocate, initialize and return a struct archive object.
@@ -74,8 +75,15 @@ archive_read_new(void)
a->archive.state = ARCHIVE_STATE_NEW;
a->entry = archive_entry_new();
- /* We always support uncompressed archives. */
- archive_read_support_compression_none(&a->archive);
+ /* Initialize reblocking logic. */
+ a->buffer_size = 64 * 1024; /* 64k */
+ a->buffer = (char *)malloc(a->buffer_size);
+ a->next = a->buffer;
+ if (a->buffer == NULL) {
+ archive_entry_free(a->entry);
+ free(a);
+ return (NULL);
+ }
return (&a->archive);
}
@@ -108,6 +116,33 @@ archive_read_open(struct archive *a, voi
client_reader, NULL, client_closer);
}
+static ssize_t
+client_read_proxy(struct archive_read_source *self, const void **buff)
+{
+ return (self->archive->client.reader)((struct archive *)self->archive,
+ self->data, buff);
+}
+
+static int64_t
+client_skip_proxy(struct archive_read_source *self, int64_t request)
+{
+ return (self->archive->client.skipper)((struct archive *)self->archive,
+ self->data, request);
+}
+
+static int
+client_close_proxy(struct archive_read_source *self)
+{
+ int r = ARCHIVE_OK;
+
+ if (self->archive->client.closer != NULL)
+ r = (self->archive->client.closer)((struct archive *)self->archive,
+ self->data);
+ free(self);
+ return (r);
+}
+
+
int
archive_read_open2(struct archive *_a, void *client_data,
archive_open_callback *client_opener,
@@ -116,28 +151,15 @@ archive_read_open2(struct archive *_a, v
archive_close_callback *client_closer)
{
struct archive_read *a = (struct archive_read *)_a;
- const void *buffer;
- ssize_t bytes_read;
int e;
- __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open");
+ __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
+ "archive_read_open");
if (client_reader == NULL)
__archive_errx(1,
"No reader function provided to archive_read_open");
- /*
- * Set these NULL initially. If the open or initial read fails,
- * we'll leave them NULL to indicate that the file is invalid.
- * (In particular, this helps ensure that the closer doesn't
- * get called more than once.)
- */
- a->client_opener = NULL;
- a->client_reader = NULL;
- a->client_skipper = NULL;
- a->client_closer = NULL;
- a->client_data = NULL;
-
/* Open data source. */
if (client_opener != NULL) {
e =(client_opener)(&a->archive, client_data);
@@ -149,129 +171,103 @@ archive_read_open2(struct archive *_a, v
}
}
- /* Read first block now for compress format detection. */
- bytes_read = (client_reader)(&a->archive, client_data, &buffer);
-
- if (bytes_read < 0) {
- /* If the first read fails, close before returning error. */
- if (client_closer)
- (client_closer)(&a->archive, client_data);
- /* client_reader should have already set error information. */
- return (ARCHIVE_FATAL);
- }
+ /* Save the client functions and mock up the initial source. */
+ a->client.opener = client_opener; /* Do we need to remember this? */
+ a->client.reader = client_reader;
+ a->client.skipper = client_skipper;
+ a->client.closer = client_closer;
+ a->client.data = client_data;
- /* Now that the client callbacks have worked, remember them. */
- a->client_opener = client_opener; /* Do we need to remember this? */
- a->client_reader = client_reader;
- a->client_skipper = client_skipper;
- a->client_closer = client_closer;
- a->client_data = client_data;
-
- /* Select a decompression routine. */
- choose_decompressor(a, buffer, (size_t)bytes_read);
- if (a->decompressor == NULL)
- return (ARCHIVE_FATAL);
+ {
+ struct archive_read_source *source;
- /* Initialize decompression routine with the first block of data. */
- e = (a->decompressor->init)(a, buffer, (size_t)bytes_read);
+ source = calloc(1, sizeof(*source));
+ if (source == NULL)
+ return (ARCHIVE_FATAL);
+ source->reader = NULL;
+ source->upstream = NULL;
+ source->archive = a;
+ source->data = client_data;
+ source->read = client_read_proxy;
+ source->skip = client_skip_proxy;
+ source->close = client_close_proxy;
+ a->source = source;
+ }
+
+ /* In case there's no filter. */
+ a->archive.compression_code = ARCHIVE_COMPRESSION_NONE;
+ a->archive.compression_name = "none";
+ /* Build out the input pipeline. */
+ e = build_stream(a);
if (e == ARCHIVE_OK)
a->archive.state = ARCHIVE_STATE_HEADER;
- /*
- * If the decompressor didn't register a skip function, provide a
- * dummy compression-layer skip function.
- */
- if (a->decompressor->skip == NULL)
- a->decompressor->skip = dummy_skip;
-
return (e);
}
/*
- * Allow each registered decompression routine to bid on whether it
- * wants to handle this stream. Return index of winning bidder.
+ * Allow each registered stream transform to bid on whether
+ * it wants to handle this stream. Repeat until we've finished
+ * building the pipeline.
*/
-static void
-choose_decompressor(struct archive_read *a,
- const void *buffer, size_t bytes_read)
+static int
+build_stream(struct archive_read *a)
{
- int decompression_slots, i, bid, best_bid;
- struct decompressor_t *decompressor, *best_decompressor;
+ int number_readers, i, bid, best_bid;
+ struct archive_reader *reader, *best_reader;
+ struct archive_read_source *source;
+ const void *block;
+ ssize_t bytes_read;
- decompression_slots = sizeof(a->decompressors) /
- sizeof(a->decompressors[0]);
+ /* Read first block now for compress format detection. */
+ bytes_read = (a->source->read)(a->source, &block);
+ if (bytes_read < 0) {
+ /* If the first read fails, close before returning error. */
+ if (a->source->close != NULL) {
+ (a->source->close)(a->source);
+ a->source = NULL;
+ }
+ /* source->read should have already set error information. */
+ return (ARCHIVE_FATAL);
+ }
+
+ number_readers = sizeof(a->readers) / sizeof(a->readers[0]);
best_bid = 0;
- a->decompressor = NULL;
- best_decompressor = NULL;
+ best_reader = NULL;
- decompressor = a->decompressors;
- for (i = 0; i < decompression_slots; i++) {
- if (decompressor->bid) {
- bid = (decompressor->bid)(buffer, bytes_read);
- if (bid > best_bid || best_decompressor == NULL) {
+ reader = a->readers;
+ for (i = 0, reader = a->readers; i < number_readers; i++, reader++) {
+ if (reader->bid != NULL) {
+ bid = (reader->bid)(reader, block, bytes_read);
+ if (bid > best_bid) {
best_bid = bid;
- best_decompressor = decompressor;
+ best_reader = reader;
}
}
- decompressor ++;
}
/*
- * There were no bidders; this is a serious programmer error
- * and demands a quick and definitive abort.
- */
- if (best_decompressor == NULL)
- __archive_errx(1, "No decompressors were registered; you "
- "must call at least one "
- "archive_read_support_compression_XXX function in order "
- "to successfully read an archive.");
-
- /*
- * There were bidders, but no non-zero bids; this means we can't
- * support this stream.
+ * If we have a winner, it becomes the next stage in the pipeline.
*/
- if (best_bid < 1) {
- archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
- "Unrecognized archive format");
- return;
- }
-
- /* Record the best decompressor for this stream. */
- a->decompressor = best_decompressor;
-}
-
-/*
- * Dummy skip function, for use if the compression layer doesn't provide
- * one: This code just reads data and discards it.
- */
-static off_t
-dummy_skip(struct archive_read * a, off_t request)
-{
- const void * dummy_buffer;
- ssize_t bytes_read;
- off_t bytes_skipped;
-
- for (bytes_skipped = 0; request > 0;) {
- bytes_read = (a->decompressor->read_ahead)(a, &dummy_buffer, 1);
- if (bytes_read < 0)
- return (bytes_read);
- if (bytes_read == 0) {
- /* Premature EOF. */
- archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
- "Truncated input file (need to skip %jd bytes)",
- (intmax_t)request);
+ if (best_reader != NULL) {
+ source = (best_reader->init)(a, best_reader, a->source,
+ block, bytes_read);
+ if (source == NULL)
return (ARCHIVE_FATAL);
- }
- if (bytes_read > request)
- bytes_read = (ssize_t)request;
- (a->decompressor->consume)(a, (size_t)bytes_read);
- request -= bytes_read;
- bytes_skipped += bytes_read;
+ /* Record the best decompressor for this stream. */
+ a->source = source;
+ /* Recurse to get next pipeline stage. */
+ return (build_stream(a));
}
- return (bytes_skipped);
+ /* Save first block of data. */
+ a->client_buff = block;
+ a->client_total = bytes_read;
+ a->client_next = a->client_buff;
+ a->client_avail = a->client_total;
+ return (ARCHIVE_OK);
}
/*
@@ -598,23 +594,24 @@ archive_read_close(struct archive *_a)
/* TODO: Clean up the formatters. */
- /* Clean up the decompressors. */
- n = sizeof(a->decompressors)/sizeof(a->decompressors[0]);
+ /* Clean up the stream pipeline. */
+ if (a->source != NULL) {
+ r1 = (a->source->close)(a->source);
+ if (r1 < r)
+ r = r1;
+ a->source = NULL;
+ }
+
+ /* Release the reader objects. */
+ n = sizeof(a->readers)/sizeof(a->readers[0]);
for (i = 0; i < n; i++) {
- if (a->decompressors[i].finish != NULL) {
- r1 = (a->decompressors[i].finish)(a);
+ if (a->readers[i].free != NULL) {
+ r1 = (a->readers[i].free)(&a->readers[i]);
if (r1 < r)
r = r1;
}
}
- /* Close the client stream. */
- if (a->client_closer != NULL) {
- r1 = ((a->client_closer)(&a->archive, a->client_data));
- if (r1 < r)
- r = r1;
- }
-
return (r);
}
@@ -651,6 +648,7 @@ archive_read_finish(struct archive *_a)
if (a->entry)
archive_entry_free(a->entry);
a->archive.magic = 0;
+ free(a->buffer);
free(a);
#if ARCHIVE_API_VERSION > 1
return (r);
@@ -700,40 +698,350 @@ __archive_read_register_format(struct ar
* Used internally by decompression routines to register their bid and
* initialization functions.
*/
-struct decompressor_t *
-__archive_read_register_compression(struct archive_read *a,
- int (*bid)(const void *, size_t),
- int (*init)(struct archive_read *, const void *, size_t))
+struct archive_reader *
+__archive_read_get_reader(struct archive_read *a)
{
int i, number_slots;
__archive_check_magic(&a->archive,
ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
- "__archive_read_register_compression");
+ "__archive_read_get_reader");
- number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]);
+ number_slots = sizeof(a->readers) / sizeof(a->readers[0]);
for (i = 0; i < number_slots; i++) {
- if (a->decompressors[i].bid == bid)
- return (a->decompressors + i);
- if (a->decompressors[i].bid == NULL) {
- a->decompressors[i].bid = bid;
- a->decompressors[i].init = init;
- return (a->decompressors + i);
- }
+ if (a->readers[i].bid == NULL)
+ return (a->readers + i);
}
__archive_errx(1, "Not enough slots for compression registration");
return (NULL); /* Never actually executed. */
}
-/* used internally to simplify read-ahead */
+/*
+ * The next three functions comprise the peek/consume internal I/O
+ * system used by archive format readers. This system allows fairly
+ * flexible read-ahead and allows the I/O code to operate in a
+ * zero-copy manner most of the time.
+ *
+ * In the ideal case, block providers give the I/O code blocks of data
+ * and __archive_read_ahead() just returns pointers directly into
+ * those blocks. Then __archive_read_consume() just bumps those
+ * pointers. Only if your request would span blocks does the I/O
+ * layer use a copy buffer to provide you with a contiguous block of
+ * data. The __archive_read_skip() is an optimization; it scans ahead
+ * very quickly (it usually translates into a seek() operation if
+ * you're reading uncompressed disk files).
+ *
+ * A couple of useful idioms:
+ * * "I just want some data." Ask for 1 byte and pay attention to
+ * the "number of bytes available" from __archive_read_ahead().
+ * You can consume more than you asked for; you just can't consume
+ * more than is available right now. If you consume everything that's
+ * immediately available, the next read_ahead() call will pull
+ * the next block.
+ * * "I want to output a large block of data." As above, ask for 1 byte,
+ * emit all that's available (up to whatever limit you have), then
+ * repeat until you're done.
+ * * "I want to peek ahead by a large amount." Ask for 4k or so, then
+ * double and repeat until you get an error or have enough. Note
+ * that the I/O layer will likely end up expanding its copy buffer
+ * to fit your request, so use this technique cautiously. This
+ * technique is used, for example, by some of the format tasting
+ * code that has uncertain look-ahead needs.
+ *
+ * TODO: Someday, provide a more generic __archive_read_seek() for
+ * those cases where it's useful. This is tricky because there are lots
+ * of cases where seek() is not available (reading gzip data from a
+ * network socket, for instance), so there needs to be a good way to
+ * communicate whether seek() is available and users of that interface
+ * need to use non-seeking strategies whenever seek() is not available.
+ */
+
+/*
+ * Looks ahead in the input stream:
+ * * If 'avail' pointer is provided, that returns number of bytes available
+ * in the current buffer, which may be much larger than requested.
+ * * If end-of-file, *avail gets set to zero.
+ * * If error, *avail gets error code.
+ * * If request can be met, returns pointer to data, returns NULL
+ * if request is not met.
+ *
+ * Note: If you just want "some data", ask for 1 byte and pay attention
+ * to *avail, which will have the actual amount available. If you
+ * know exactly how many bytes you need, just ask for that and treat
+ * a NULL return as an error.
+ *
+ * Important: This does NOT move the file pointer. See
+ * __archive_read_consume() below.
+ */
+
+/*
+ * This is tricky. We need to provide our clients with pointers to
+ * contiguous blocks of memory but we want to avoid copying whenever
+ * possible.
+ *
+ * Mostly, this code returns pointers directly into the block of data
+ * provided by the client_read routine. It can do this unless the
+ * request would split across blocks. In that case, we have to copy
+ * into an internal buffer to combine reads.
+ */
const void *
-__archive_read_ahead(struct archive_read *a, size_t len)
+__archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
{
- const void *h;
+ ssize_t bytes_read;
+ size_t tocopy;
- if ((a->decompressor->read_ahead)(a, &h, len) < (ssize_t)len)
+ if (a->fatal) {
+ if (avail)
+ *avail = ARCHIVE_FATAL;
return (NULL);
- return (h);
+ }
+
+ /*
+ * Keep pulling more data until we can satisfy the request.
+ */
+ for (;;) {
+
+ /*
+ * If we can satisfy from the copy buffer, we're done.
+ */
+ if (a->avail >= min) {
+ if (avail != NULL)
+ *avail = a->avail;
+ return (a->next);
+ }
+
+ /*
+ * We can satisfy directly from client buffer if everything
+ * currently in the copy buffer is still in the client buffer.
+ */
+ if (a->client_total >= a->client_avail + a->avail
+ && a->client_avail + a->avail >= min) {
+ /* "Roll back" to client buffer. */
+ a->client_avail += a->avail;
+ a->client_next -= a->avail;
+ /* Copy buffer is now empty. */
+ a->avail = 0;
+ a->next = a->buffer;
+ /* Return data from client buffer. */
+ if (avail != NULL)
+ *avail = a->client_avail;
+ return (a->client_next);
+ }
+
+ /* Move data forward in copy buffer if necessary. */
+ if (a->next > a->buffer &&
+ a->next + min > a->buffer + a->buffer_size) {
+ if (a->avail > 0)
+ memmove(a->buffer, a->next, a->avail);
+ a->next = a->buffer;
+ }
+
+ /* If we've used up the client data, get more. */
+ if (a->client_avail <= 0) {
+ if (a->end_of_file) {
+ if (avail != NULL)
+ *avail = 0;
+ return (NULL);
+ }
+ bytes_read = (a->source->read)(a->source,
+ &a->client_buff);
+ if (bytes_read < 0) { /* Read error. */
+ a->client_total = a->client_avail = 0;
+ a->client_next = a->client_buff = NULL;
+ a->fatal = 1;
+ if (avail != NULL)
+ *avail = ARCHIVE_FATAL;
+ return (NULL);
+ }
+ if (bytes_read == 0) { /* Premature end-of-file. */
+ a->client_total = a->client_avail = 0;
+ a->client_next = a->client_buff = NULL;
+ a->end_of_file = 1;
+ /* Return whatever we do have. */
+ if (avail != NULL)
+ *avail = a->avail;
+ return (NULL);
+ }
+ a->archive.raw_position += bytes_read;
+ a->client_total = bytes_read;
+ a->client_avail = a->client_total;
+ a->client_next = a->client_buff;
+ }
+ else
+ {
+ /*
+ * We can't satisfy the request from the copy
+ * buffer or the existing client data, so we
+ * need to copy more client data over to the
+ * copy buffer.
+ */
+
+ /* Ensure the buffer is big enough. */
+ if (min > a->buffer_size) {
+ size_t s, t;
+ char *p;
+
+ /* Double the buffer; watch for overflow. */
+ s = t = a->buffer_size;
+ while (s < min) {
+ t *= 2;
+ if (t <= s) { /* Integer overflow! */
+ archive_set_error(&a->archive,
+ ENOMEM,
+ "Unable to allocate copy buffer");
+ a->fatal = 1;
+ if (avail != NULL)
+ *avail = ARCHIVE_FATAL;
+ return (NULL);
+ }
+ s = t;
+ }
+ /* Now s >= min, so allocate a new buffer. */
+ p = (char *)malloc(s);
+ if (p == NULL) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Unable to allocate copy buffer");
+ a->fatal = 1;
+ if (avail != NULL)
+ *avail = ARCHIVE_FATAL;
+ return (NULL);
+ }
+ /* Move data into newly-enlarged buffer. */
+ if (a->avail > 0)
+ memmove(p, a->next, a->avail);
+ free(a->buffer);
+ a->next = a->buffer = p;
+ a->buffer_size = s;
+ }
+
+ /* We can add client data to copy buffer. */
+ /* First estimate: copy to fill rest of buffer. */
+ tocopy = (a->buffer + a->buffer_size)
+ - (a->next + a->avail);
+ /* Don't waste time buffering more than we need to. */
+ if (tocopy + a->avail > min)
+ tocopy = min - a->avail;
+ /* Don't copy more than is available. */
+ if (tocopy > a->client_avail)
+ tocopy = a->client_avail;
+
+ memcpy(a->next + a->avail, a->client_next,
+ tocopy);
+ /* Remove this data from client buffer. */
+ a->client_next += tocopy;
+ a->client_avail -= tocopy;
+ /* add it to copy buffer. */
+ a->avail += tocopy;
+ }
+ }
+}
+
+/*
+ * Move the file pointer forward. This should be called after
+ * __archive_read_ahead() returns data to you. Don't try to move
+ * ahead by more than the amount of data available according to
+ * __archive_read_ahead().
+ */
+/*
+ * Mark the appropriate data as used. Note that the request here will
+ * often be much smaller than the size of the previous read_ahead
+ * request.
+ */
+ssize_t
+__archive_read_consume(struct archive_read *a, size_t request)
+{
+ if (a->avail > 0) {
+ /* Read came from copy buffer. */
+ a->next += request;
+ a->avail -= request;
+ } else {
+ /* Read came from client buffer. */
+ a->client_next += request;
+ a->client_avail -= request;
+ }
+ a->archive.file_position += request;
+ return (request);
+}
+
+/*
+ * Move the file pointer ahead by an arbitrary amount. If you're
+ * reading uncompressed data from a disk file, this will actually
+ * translate into a seek() operation. Even in cases where seek()
+ * isn't feasible, this at least pushes the read-and-discard loop
+ * down closer to the data source.
+ */
+int64_t
+__archive_read_skip(struct archive_read *a, int64_t request)
+{
+ off_t bytes_skipped, total_bytes_skipped = 0;
+ size_t min;
+
+ if (a->fatal)
+ return (-1);
+ /*
+ * If there is data in the buffers already, use that first.
+ */
+ if (a->avail > 0) {
+ min = minimum(request, (off_t)a->avail);
+ bytes_skipped = __archive_read_consume(a, min);
+ request -= bytes_skipped;
+ total_bytes_skipped += bytes_skipped;
+ }
+ if (a->client_avail > 0) {
+ min = minimum(request, (off_t)a->client_avail);
+ bytes_skipped = __archive_read_consume(a, min);
+ request -= bytes_skipped;
+ total_bytes_skipped += bytes_skipped;
+ }
+ if (request == 0)
+ return (total_bytes_skipped);
+ /*
+ * If a client_skipper was provided, try that first.
+ */
+#if ARCHIVE_API_VERSION < 2
+ if ((a->source->skip != NULL) && (request < SSIZE_MAX)) {
+#else
+ if (a->source->skip != NULL) {
+#endif
+ bytes_skipped = (a->source->skip)(a->source, request);
+ if (bytes_skipped < 0) { /* error */
+ a->client_total = a->client_avail = 0;
+ a->client_next = a->client_buff = NULL;
+ a->fatal = 1;
+ return (bytes_skipped);
+ }
+ total_bytes_skipped += bytes_skipped;
+ a->archive.file_position += bytes_skipped;
+ request -= bytes_skipped;
+ a->client_next = a->client_buff;
+ a->archive.raw_position += bytes_skipped;
+ a->client_avail = a->client_total = 0;
+ }
+ /*
+ * Note that client_skipper will usually not satisfy the
+ * full request (due to low-level blocking concerns),
+ * so even if client_skipper is provided, we may still
+ * have to use ordinary reads to finish out the request.
+ */
+ while (request > 0) {
+ const void* dummy_buffer;
+ ssize_t bytes_read;
+ dummy_buffer = __archive_read_ahead(a, 1, &bytes_read);
+ if (bytes_read < 0)
+ return (bytes_read);
+ if (bytes_read == 0) {
+ /* We hit EOF before we satisfied the skip request. */
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "Truncated input file (need to skip %jd bytes)",
+ (intmax_t)request);
+ return (ARCHIVE_FATAL);
+ }
+ min = (size_t)(minimum(bytes_read, request));
+ bytes_read = __archive_read_consume(a, min);
+ total_bytes_skipped += bytes_read;
+ request -= bytes_read;
+ }
+ return (total_bytes_skipped);
}
Modified: head/lib/libarchive/archive_read_private.h
==============================================================================
--- head/lib/libarchive/archive_read_private.h Sat Dec 6 06:23:37 2008 (r185678)
+++ head/lib/libarchive/archive_read_private.h Sat Dec 6 06:45:15 2008 (r185679)
@@ -32,6 +32,75 @@
#include "archive_string.h"
#include "archive_private.h"
+struct archive_read;
+struct archive_reader;
+struct archive_read_source;
+
+/*
+ * A "reader" knows how to provide blocks. That can include something
+ * that reads blocks from disk or socket or a transformation layer
+ * that reads blocks from another source and transforms them. This
+ * includes decompression and decryption filters.
+ *
+ * How bidding works:
+ * * The bid manager reads the first block from the current source.
+ * * It shows that block to each registered bidder.
+ * * The winning bidder is initialized (with the block and information
+ * about the source)
+ * * The winning bidder becomes the new source and the process repeats
+ * This ends only when no reader provides a non-zero bid.
+ */
+struct archive_reader {
+ /* Configuration data for the reader. */
+ void *data;
+ /* Bidder is handed the initial block from its source. */
+ int (*bid)(struct archive_reader *, const void *buff, size_t);
+ /* Init() is given the archive, upstream source, and the initial
+ * block above. It returns a populated source structure. */
+ struct archive_read_source *(*init)(struct archive_read *,
+ struct archive_reader *, struct archive_read_source *source,
+ const void *, size_t);
+ /* Release the reader and any configuration data it allocated. */
+ int (*free)(struct archive_reader *);
+};
+
+/*
+ * A "source" is an instance of a reader. This structure is
+ * allocated and initialized by the init() method of a reader
+ * above.
+ */
+struct archive_read_source {
+ /* Essentially all sources will need these values, so
+ * just declare them here. */
+ struct archive_reader *reader; /* Reader that I'm an instance of. */
+ struct archive_read_source *upstream; /* Who I get blocks from. */
+ struct archive_read *archive; /* associated archive. */
+ /* Return next block. */
+ ssize_t (*read)(struct archive_read_source *, const void **);
+ /* Skip forward this many bytes. */
+ int64_t (*skip)(struct archive_read_source *self, int64_t request);
+ /* Close (recursively) and free(self). */
+ int (*close)(struct archive_read_source *self);
+ /* My private data. */
+ void *data;
+};
+
+/*
+ * The client source is almost the same as an internal source.
+ *
+ * TODO: Make archive_read_source and archive_read_client identical so
+ * that users of the library can easily register their own
+ * transformation filters. This will probably break the API/ABI and
+ * so should be deferred until libarchive 3.0.
+ */
+struct archive_read_client {
+ archive_open_callback *opener;
+ archive_read_callback *reader;
+ archive_skip_callback *skipper;
+ archive_close_callback *closer;
+ void *data;
+};
+
struct archive_read {
struct archive archive;
@@ -50,46 +119,30 @@ struct archive_read {
off_t read_data_output_offset;
size_t read_data_remaining;
- /* Callbacks to open/read/write/close archive stream. */
- archive_open_callback *client_opener;
- archive_read_callback *client_reader;
- archive_skip_callback *client_skipper;
- archive_close_callback *client_closer;
- void *client_data;
+ /* Callbacks to open/read/write/close client archive stream. */
+ struct archive_read_client client;
+
+ /* Registered readers. */
+ struct archive_reader readers[8];
+
+ /* Source */
+ struct archive_read_source *source;
/* File offset of beginning of most recently-read header. */
off_t header_position;
- /*
- * Decompressors have a very specific lifecycle:
- * public setup function initializes a slot in this table
- * 'config' holds minimal configuration data
- * bid() examines a block of data and returns a bid [1]
- * init() is called for successful bidder
- * 'data' is initialized by init()
- * read() returns a pointer to the next block of data
- * consume() indicates how much data is used
- * skip() ignores bytes of data
- * finish() cleans up and frees 'data' and 'config'
- *
- * [1] General guideline: bid the number of bits that you actually
- * test, e.g., 16 if you test a 2-byte magic value.
- */
- struct decompressor_t {
- void *config;
- void *data;
- int (*bid)(const void *buff, size_t);
- int (*init)(struct archive_read *,
- const void *buff, size_t);
- int (*finish)(struct archive_read *);
- ssize_t (*read_ahead)(struct archive_read *,
- const void **, size_t);
- ssize_t (*consume)(struct archive_read *, size_t);
- off_t (*skip)(struct archive_read *, off_t);
- } decompressors[4];
- /* Pointer to current decompressor. */
- struct decompressor_t *decompressor;
+ /* Used by reblocking logic. */
+ char *buffer;
+ size_t buffer_size;
+ char *next; /* Current read location. */
+ size_t avail; /* Bytes in my buffer. */
+ const void *client_buff; /* Client buffer information. */
+ size_t client_total;
+ const char *client_next;
+ size_t client_avail;
+ char end_of_file;
+ char fatal;
/*
* Format detection is mostly the same as compression
@@ -124,12 +177,13 @@ int __archive_read_register_format(struc
int (*read_data_skip)(struct archive_read *),
int (*cleanup)(struct archive_read *));
-struct decompressor_t
- *__archive_read_register_compression(struct archive_read *a,
- int (*bid)(const void *, size_t),
- int (*init)(struct archive_read *, const void *, size_t));
+struct archive_reader
+ *__archive_read_get_reader(struct archive_read *a);
const void
- *__archive_read_ahead(struct archive_read *, size_t);
-
+ *__archive_read_ahead(struct archive_read *, size_t, ssize_t *);
+ssize_t
+ __archive_read_consume(struct archive_read *, size_t);
+int64_t
+ __archive_read_skip(struct archive_read *, int64_t);
#endif
Modified: head/lib/libarchive/archive_read_support_compression_all.c
==============================================================================
--- head/lib/libarchive/archive_read_support_compression_all.c Sat Dec 6 06:23:37 2008 (r185678)
+++ head/lib/libarchive/archive_read_support_compression_all.c Sat Dec 6 06:45:15 2008 (r185679)
@@ -39,5 +39,11 @@ archive_read_support_compression_all(str
#if HAVE_ZLIB_H
archive_read_support_compression_gzip(a);
#endif
+#if HAVE_LZMADEC_H
+ /* LZMA bidding is subject to false positives because
+ * the LZMA file format has a very weak signature. It
+ * may not be feasible to include LZMA detection here. */
+ /* archive_read_support_compression_lzma(a); */
+#endif
return (ARCHIVE_OK);
}
Modified: head/lib/libarchive/archive_read_support_compression_bzip2.c
==============================================================================
--- head/lib/libarchive/archive_read_support_compression_bzip2.c Sat Dec 6 06:23:37 2008 (r185678)
+++ head/lib/libarchive/archive_read_support_compression_bzip2.c Sat Dec 6 06:45:15 2008 (r185679)
@@ -51,30 +51,49 @@ __FBSDID("$FreeBSD$");
#if HAVE_BZLIB_H
struct private_data {
bz_stream stream;
- char *uncompressed_buffer;
- size_t uncompressed_buffer_size;
- char *read_next;
- int64_t total_out;
+ char *out_block;
+ size_t out_block_size;
+ char valid; /* True = decompressor is initialized */
char eof; /* True = found end of compressed data. */
};
-static int finish(struct archive_read *);
-static ssize_t read_ahead(struct archive_read *, const void **, size_t);
-static ssize_t read_consume(struct archive_read *, size_t);
-static int drive_decompressor(struct archive_read *a, struct private_data *);
+/* Bzip2 source */
+static ssize_t bzip2_source_read(struct archive_read_source *, const void **);
+static int bzip2_source_close(struct archive_read_source *);
#endif
-/* These two functions are defined even if we lack the library. See below. */
-static int bid(const void *, size_t);
-static int init(struct archive_read *, const void *, size_t);
+/*
+ * Note that we can detect bzip2 archives even if we can't decompress
+ * them. (In fact, we like detecting them because we can give better
+ * error messages.) So the bid framework here gets compiled even
+ * if bzlib is unavailable.
+ */
+static int bzip2_reader_bid(struct archive_reader *, const void *, size_t);
+static struct archive_read_source *bzip2_reader_init(struct archive_read *,
+ struct archive_reader *, struct archive_read_source *,
+ const void *, size_t);
+static int bzip2_reader_free(struct archive_reader *);
int
archive_read_support_compression_bzip2(struct archive *_a)
{
struct archive_read *a = (struct archive_read *)_a;
- if (__archive_read_register_compression(a, bid, init) != NULL)
- return (ARCHIVE_OK);
- return (ARCHIVE_FATAL);
+ struct archive_reader *reader = __archive_read_get_reader(a);
+
+ if (reader == NULL)
+ return (ARCHIVE_FATAL);
+
+ reader->data = NULL;
+ reader->bid = bzip2_reader_bid;
+ reader->init = bzip2_reader_init;
+ reader->free = bzip2_reader_free;
+ return (ARCHIVE_OK);
+}
+
+static int
+bzip2_reader_free(struct archive_reader *self){
+ (void)self; /* UNUSED */
+ return (ARCHIVE_OK);
}
/*
@@ -85,11 +104,13 @@ archive_read_support_compression_bzip2(s
* from verifying as much as we would like.
*/
static int
-bid(const void *buff, size_t len)
+bzip2_reader_bid(struct archive_reader *self, const void *buff, size_t len)
{
const unsigned char *buffer;
int bits_checked;
+ (void)self; /* UNUSED */
+
if (len < 1)
return (0);
@@ -150,16 +171,19 @@ bid(const void *buff, size_t len)
* decompression. We can, however, still detect compressed archives
* and emit a useful message.
*/
-static int
-init(struct archive_read *a, const void *buff, size_t n)
+static struct archive_read_source *
+bzip2_reader_init(struct archive_read *a, struct archive_reader *reader,
+ struct archive_read_source *upstream, const void *buff, size_t n)
{
(void)a; /* UNUSED */
+ (void)reader; /* UNUSED */
+ (void)upstream; /* UNUSED */
(void)buff; /* UNUSED */
(void)n; /* UNUSED */
archive_set_error(&a->archive, -1,
"This version of libarchive was compiled without bzip2 support");
- return (ARCHIVE_FATAL);
+ return (NULL);
}
@@ -168,37 +192,42 @@ init(struct archive_read *a, const void
/*
* Setup the callbacks.
*/
-static int
-init(struct archive_read *a, const void *buff, size_t n)
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list