bin/125350: [patch] src/lib/libfetch add support for deflate and
gzip encoded http downloads
Dominic Fandrey
kamikaze at bsdforen.de
Sun Jul 6 23:20:01 UTC 2008
>Number: 125350
>Category: bin
>Synopsis: [patch] src/lib/libfetch add support for deflate and gzip encoded http downloads
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: change-request
>Submitter-Id: current-users
>Arrival-Date: Sun Jul 06 23:20:00 UTC 2008
>Closed-Date:
>Last-Modified:
>Originator: Dominic Fandrey
>Release: RELENG_7
>Organization:
private
>Environment:
FreeBSD mobileKamikaze.norad 7.0-STABLE FreeBSD 7.0-STABLE #1: Sat Jun 7 14:00:26 CEST 2008 root at mobileKamikaze.norad:/usr/obj/HP6510b/amd64/usr/src/sys/HP6510b amd64
>Description:
The patch adds support for gzip and deflate compression for html downloads to libfetch.
This is work in progress. Things yet to be done in order of preference:
1. Add support for compress encoding.
2. Implement random access layer (not required/useful for libfetch).
3. Clean up http.c (a seemingly monumental task).
>How-To-Repeat:
>Fix:
Patch attached with submission follows:
diff -Pur src/lib/libfetch.orig/Makefile src/lib/libfetch/Makefile
--- src/lib/libfetch.orig/Makefile 2008-07-07 00:56:11.000000000 +0200
+++ src/lib/libfetch/Makefile 2008-07-07 00:56:36.000000000 +0200
@@ -4,7 +4,7 @@
LIB= fetch
CFLAGS+= -I.
-SRCS= fetch.c common.c ftp.c http.c file.c \
+SRCS= fetch.c common.c ftp.c http.c httpdecode.c file.c \
ftperr.h httperr.h
INCS= fetch.h
MAN= fetch.3
@@ -20,6 +20,8 @@
LDADD= -lssl -lcrypto
.endif
+LDADD+= -lz
+
CFLAGS+= -DFTP_COMBINE_CWDS
CSTD?= c99
diff -Pur src/lib/libfetch.orig/http.c src/lib/libfetch/http.c
--- src/lib/libfetch.orig/http.c 2008-07-07 00:56:11.000000000 +0200
+++ src/lib/libfetch/http.c 2008-07-07 00:59:01.000000000 +0200
@@ -82,6 +82,7 @@
#include "fetch.h"
#include "common.h"
#include "httperr.h"
+#include "httpdecode.h"
/* Maximum number of redirects to follow */
#define MAX_REDIRECT 5
@@ -336,6 +337,7 @@
hdr_error = -1,
hdr_end = 0,
hdr_unknown = 1,
+ hdr_content_encoding,
hdr_content_length,
hdr_content_range,
hdr_last_modified,
@@ -349,6 +351,7 @@
hdr_t num;
const char *name;
} hdr_names[] = {
+ { hdr_content_encoding, "Content-Encoding" },
{ hdr_content_length, "Content-Length" },
{ hdr_content_range, "Content-Range" },
{ hdr_last_modified, "Last-Modified" },
@@ -496,6 +499,21 @@
}
/*
+ * Parse a content-encoding header
+ */
+static int
+http_parse_encoding(const char *p)
+{
+ if (strcmp("gzip", p) == 0)
+ return(ENCODING_GZIP);
+ if (strcmp("deflate", p) == 0)
+ return(ENCODING_DEFLATE);
+ if (strcmp("compress", p) == 0)
+ return(ENCODING_COMPRESS);
+ return(ENCODING_RAW);
+}
+
+/*
* Parse a content-length header
*/
static int
@@ -800,14 +818,17 @@
conn_t *conn;
struct url *url, *new;
int chunked, direct, need_auth, noredirect, verbose;
- int e, i, n, val;
+ int e, i, n, val, encoding;
off_t offset, clength, length, size;
time_t mtime;
const char *p;
- FILE *f;
+ FILE *f, *d;
hdr_t h;
char hbuf[MAXHOSTNAMELEN + 7], *host;
+ f = NULL;
+ d = NULL;
+
direct = CHECK_FLAG('d');
noredirect = CHECK_FLAG('A');
verbose = CHECK_FLAG('v');
@@ -834,6 +855,7 @@
length = -1;
size = -1;
mtime = 0;
+ encoding = ENCODING_RAW;
/* check port */
if (!url->port)
@@ -919,6 +941,7 @@
http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname());
if (url->offset > 0)
http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
+ http_cmd(conn, "Accept-Encoding: gzip,deflate");
http_cmd(conn, "Connection: close");
http_cmd(conn, "");
@@ -999,6 +1022,9 @@
case hdr_error:
http_seterr(HTTP_PROTOCOL_ERROR);
goto ouch;
+ case hdr_content_encoding:
+ encoding = http_parse_encoding(p);
+ break;
case hdr_content_length:
http_parse_length(p, &clength);
break;
@@ -1119,7 +1145,9 @@
/* fill in stats */
if (us) {
- us->size = size;
+ /* we can only predict the size of unencoded streams */
+ if (encoding == ENCODING_RAW)
+ us->size = size;
us->atime = us->mtime = mtime;
}
@@ -1139,6 +1167,13 @@
goto ouch;
}
+ /* wrap the decoder around it */
+ if ((d = httpDecode(f, encoding, size)) == NULL) {
+ fetch_syserr();
+ fclose(f);
+ goto ouch;
+ }
+
if (url != URL)
fetchFreeURL(url);
if (purl)
@@ -1150,7 +1185,7 @@
f = NULL;
}
- return (f);
+ return (d);
ouch:
if (url != URL)
diff -Pur src/lib/libfetch.orig/httpdecode.c src/lib/libfetch/httpdecode.c
--- src/lib/libfetch.orig/httpdecode.c 1970-01-01 01:00:00.000000000 +0100
+++ src/lib/libfetch/httpdecode.c 2008-07-07 00:56:36.000000000 +0200
@@ -0,0 +1,411 @@
+/*
+ * I wrote this and I say you can do whatever you want with it. Period.
+ * However, I'd love to hear from you what you've done.
+ *
+ * Dominic Fandrey <kamikaze at bsdforen.de>
+ */
+
+/**
+ * \file httpdecode.c
+ *
+ * This file contains the implemention of the prototypes defined in
+ * httpdecode.h.
+ *
+ * @brief
+ * HTTP content decoding implemention.
+ * @see
+ * httpdecode.h
+ * @author
+ * Dominic Fandrey <kamikaze at bsdforen.de>
+ * @version
+ * 0.1.99.2008.07.07
+ */
+
+/* LINTLIBRARY */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <zlib.h>
+#include "httpdecode.h"
+
+/* PRIVATE STRUCTS */
+
+/**
+ * @brief
+ * The necessary data to maintain a zlib decoding stream.
+ */
+struct zlibStream {
+ /**
+ * @brief
+ * The original stream with the encoded data.
+ */
+ FILE * source;
+
+ /**
+ * @brief
+ * A read buffer for the encoded stream.
+ */
+ char * buffer;
+
+ /**
+ * This specifies the encoding of the data. The values
+ * ENCODING_GZIP and ENCODING_DEFLATE are possible.
+ *
+ * @brief
+ * The encoding type of the stream.
+ */
+ int encoding;
+
+ /**
+ * @brief
+ * The stream data used by zlib.
+ */
+ z_stream stream;
+
+ /**
+ * The length of the source stream. The value 0 means that the length
+ * is unknown higher values will be used to automatically close the
+ * stream. This prevents overreading and allows the continued use
+ * of the underlying HTTP stream.
+ *
+ * @brief
+ * The length of the encoded source stream.
+ */
+ size_t length;
+
+ /**
+ * @brief
+ * The amount of data that has been read.
+ */
+ size_t read;
+
+ /**
+ * @brief
+ * The size of the buffer for encoded data.
+ */
+ size_t bufferSize;
+
+ /**
+ * @brief
+ * The amount of data available in the buffer.
+ */
+ size_t bufferUsed;
+};
+
+/* PRIVATE PROTOTYPES */
+void moveBuffer(struct zlibStream * cookie, char * newBuffer, size_t size);
+FILE * zlibOpen(struct zlibStream * cookie);
+size_t zlibRead(struct zlibStream * cookie, char * buffer, size_t length);
+int zlibClose(struct zlibStream * cookie);
+/* TODO
+FILE * compressOpen(struct zlibStream * cookie);
+size_t compressRead(struct zlibStream * cookie, char * buffer, size_t length);
+int compressClose(struct zlibStream * cookie);
+FILE * randomOpen(struct zlibStream * cookie);
+size_t randomRead(struct zlibStream * cookie, char * buffer, size_t length);
+int randomSeek(struct zlibStream * cookie, off_t offset, int whence);
+int randomClose(struct zlibStream * cookie);
+*/
+
+/* PUBLIC FUNCTIONS */
+
+/**
+ * Opens a given stream for decoding and returns a FILE handle that can be
+ * used with the fread and fclose function. Internally funopen is used
+ * to achieve this.
+ *
+ * In case of failure NULL is returned and errno is set to EINVAL for
+ * invalid parameters and ENOMEM for insufficient memory.
+ *
+ * @brief
+ * Open a FILE stream to read decoded data from.
+ * @param source
+ * The stream to read the encoded data from.
+ * @param encoding
+ * The encoding type of the source stream.
+ * @param length
+ * The length of the source stream. Use 0 if unknown.
+ * @return
+ * Returns a FILE handle to read an encoded stream.
+ * @see
+ * funopen(3)
+ * @see
+ * fread(3)
+ * @see
+ * fclose(3)
+ */
+FILE * httpDecode(FILE * source, int encoding, size_t length) {
+ struct zlibStream * zlibCookie;
+
+ switch (encoding) {
+ case ENCODING_RAW:
+ return(source);
+ case ENCODING_GZIP: case ENCODING_DEFLATE:
+ zlibCookie = malloc(sizeof(struct zlibStream));
+ if (zlibCookie == NULL) /* errno == ENOMEM */
+ return(NULL);
+ zlibCookie->buffer = NULL;
+ zlibCookie->bufferSize = 0;
+ zlibCookie->source = source;
+ zlibCookie->length = length;
+ zlibCookie->read = 0;
+ zlibCookie->encoding = encoding;
+ return(zlibOpen(zlibCookie));
+ case ENCODING_COMPRESS:
+ return(NULL);
+ }
+
+ return(NULL);
+}
+
+/**
+ * This function is a wraper around httpDecode that allows random access
+ * by writing the stream into a temporary file. The file is buffered
+ * by a given number of buffers in memory.
+ * Buffers are overwritten in LRU order.
+ *
+ * @brief
+ * A file backed wrapper around httpDecode for random access.
+ * @param source
+ * The stream to read the encoded data from.
+ * @param encoding
+ * The encoding type of the source stream.
+ * @param length
+ * The length of the source stream. Use 0 if unknown.
+ * @param bufferSize
+ * The size of a buffer.
+ * @param
+ * The number of buffers.
+ * @return
+ * Returns a FILE handle to read an encoded stream.
+ */
+/* TODO
+FILE * httpDecodeRandom(FILE * source, int encoding, size_t length,
+ size_t bufferSize, size_t buffers) {
+ return(source);
+}
+*/
+
+/* PRIVATE FUNCTIONS */
+
+/**
+ * This function replaces the read buffer in the cookie with the new buffer.
+ * The old buffer is freed but the contents are saved in the new buffer.
+ * However, no security checks are performed.
+ * That means that newBuffer must at least have the same size as the old one.
+ *
+ * @brief
+ * Replace the current read buffer.
+ * @param cookie
+ * Contains all the data necessary to maintain the stream.
+ * @param newBuffer
+ * The new buffer to use.
+ */
+void moveBuffer(struct zlibStream * cookie, char * newBuffer, size_t size) {
+ memmove(newBuffer, cookie->buffer, cookie->bufferUsed);
+ free(cookie->buffer);
+ cookie->buffer = newBuffer;
+ cookie->bufferSize = size;
+}
+
+/**
+ * This function initializes a zlib stream and creates the file handler
+ * that will later be used to pull data from the stream.
+ *
+ * Upon any kind of failure errno is set to one of the following values:
+ * EINVAL This can either indicate that an unsupported encoding
+ * was given or that this code and the used zlib implemention
+ * are incompatible.
+ * ENOMEM Indicates that the available memory is insuficient for
+ * the decode buffer, zlib or funopen.
+ *
+ * @brief
+ * Open a zlib stream.
+ * @param cookie
+ * Contains all the data necessary to maintain the stream.
+ * @return
+ * A FILE* pointer or NULL in case of failure.
+ */
+FILE * zlibOpen(struct zlibStream * cookie) {
+ int wbits;
+ z_stream * stream = &(cookie->stream);
+
+ /* Initialize zlib stream data. */
+ stream->zalloc = Z_NULL;
+ stream->zfree = Z_NULL;
+ stream->opaque = Z_NULL;
+ stream->avail_in = 0;
+ stream->next_in = (Bytef *) cookie->buffer;
+
+ /* Set window bits for the selected encoding. */
+ switch(cookie->encoding) {
+ case ENCODING_DEFLATE:
+ wbits = -MAX_WBITS;
+ break;
+ case ENCODING_GZIP:
+ wbits = MAX_WBITS + 16;
+ break;
+ default:
+ errno = EINVAL;
+ return(NULL);
+ }
+
+ /* Create the decoding buffer. */
+ cookie->bufferSize = 32768; /* Zlib breaks with a smaller buffer. */
+ cookie->bufferUsed = 0;
+ cookie->buffer = malloc(cookie->bufferSize);
+ if (cookie->buffer == NULL)
+ return(NULL); /* errno == ENOMEM */
+
+ /* Initialize stream for decoding. */
+ switch(inflateInit2(stream, wbits)) {
+ case Z_OK:
+ errno = 0;
+ break;
+ case Z_MEM_ERROR:
+ errno = ENOMEM;
+ break;
+ case Z_STREAM_ERROR: /* This is not supposed to happen. */
+ errno = EINVAL;
+ break;
+ }
+ if (errno) {
+ free(cookie->buffer);
+ return(NULL);
+ }
+
+ /* Create the file stream to return. */
+ return(funopen(cookie,(int (*)(void *, char *, int)) zlibRead,
+ NULL, NULL, (int (*)(void *)) zlibClose));
+}
+
+/**
+ * Writes a chunk of decoded data to the given buffer.
+ *
+ * In case of an error (size_t) -1 is returned to indicate to the funopen
+ * wrapper that an error occured. In such a case errno is set to EIO.
+ *
+ * An error does not cause the stream to be closed.
+ *
+ * @brief
+ * Read decoded data from the encoded stream.
+ * @param cookie
+ * Contains all the data necessary to maintain the stream.
+ * @param buffer
+ * The buffer to write the decoded data to.
+ * @param length
+ * The space available in the buffer.
+ * @return
+ * The number of bytes written to the buffer or (size_t) -1 in case of
+ * failure.
+ */
+size_t zlibRead(struct zlibStream * cookie, char * buffer, size_t length) {
+ char * tmpBuffer;
+ size_t growth, maxRead, bufferAvailable, flushed;
+ int zlibStatus;
+ z_stream * stream = &(cookie->stream);
+
+ /*
+ * Adjust buffer size if the target buffer is larger than 2 times
+ * the source buffer.
+ */
+ if ((length >> 1) > cookie->bufferSize) {
+ tmpBuffer = malloc(length >> 1);
+
+ /*
+ * If creating a new buffer fails pretend never to have
+ * attempted it.
+ */
+ if (tmpBuffer == NULL)
+ errno = 0;
+ else
+ /* Move data from the old buffer to the new one. */
+ moveBuffer(cookie, tmpBuffer, length >> 1);
+ }
+
+ /* Run until the target buffer has been filled. */
+ flushed = 0;
+ while (length > 0) {
+ /* If the input buffer is not full, fill it. */
+ growth = 0;
+ maxRead = cookie->length - cookie->read;
+ bufferAvailable = cookie->bufferSize - cookie->bufferUsed;
+ bufferAvailable = (maxRead < bufferAvailable \
+ ? maxRead : bufferAvailable);
+ if (bufferAvailable > 0) {
+ growth = fread(cookie->buffer, sizeof(char), \
+ bufferAvailable, cookie->source);
+ /* Forward errors. */
+ if (ferror(cookie->source))
+ return((size_t) -1);
+ cookie->bufferUsed += growth;
+ cookie->read += growth;
+ }
+
+ /* Decode data from the read to the target buffer. */
+ stream->avail_in = cookie->bufferUsed;
+ stream->next_in = (Bytef *) cookie->buffer;
+ stream->avail_out = length;
+ stream->next_out = (Bytef *) buffer;
+ zlibStatus = inflate(stream, Z_SYNC_FLUSH);
+
+ /* The amount of data just written to the target buffer. */
+ growth = length - stream->avail_out;
+
+ /* Adjust the read buffer. */
+ memmove(cookie->buffer, stream->next_in, \
+ (size_t) stream->avail_in);
+ cookie->bufferUsed = stream->avail_in;
+ stream->next_in = (Bytef *) cookie->buffer;
+
+ /* Adjust the target buffer. */
+ flushed += growth;
+ buffer += growth;
+ length = stream->avail_out;
+
+ /* Deal with errors. */
+ switch (zlibStatus) {
+ case Z_OK:
+ break;
+ case Z_STREAM_END:
+ length = 0;
+ break;
+ case Z_BUF_ERROR:
+ /* The read buffer is too small, try to double it. */
+ tmpBuffer = malloc(cookie->bufferSize << 1);
+ if (!tmpBuffer) /* errno == ENOMEN */
+ return((size_t) -1);
+ moveBuffer(cookie, tmpBuffer, cookie->bufferSize << 1);
+ break;
+ case Z_NEED_DICT: case Z_DATA_ERROR: case Z_STREAM_ERROR:
+ errno = EIO;
+ return((size_t) -1);
+ case Z_MEM_ERROR:
+ errno = ENOMEM;
+ return((size_t) -1);
+ }
+ }
+
+ return(flushed);
+}
+
+/**
+ * Closes the decoding stream and frees all buffers.
+ *
+ * @brief
+ * Closes the decoding stream.
+ * @param cookie
+ * Contains all the data necessary to maintain the stream.
+ * @return
+ * Always 0 for success.
+ */
+int zlibClose(struct zlibStream * cookie) {
+ inflateEnd(&(cookie->stream));
+ free(cookie->buffer);
+ free(cookie);
+ return(0);
+}
+
diff -Pur src/lib/libfetch.orig/httpdecode.h src/lib/libfetch/httpdecode.h
--- src/lib/libfetch.orig/httpdecode.h 1970-01-01 01:00:00.000000000 +0100
+++ src/lib/libfetch/httpdecode.h 2008-07-07 00:56:36.000000000 +0200
@@ -0,0 +1,116 @@
+/*
+ * I wrote this and I say you can do whatever you want with it. Period.
+ * However, I'd love to hear from you what you've done.
+ *
+ * Dominic Fandrey <kamikaze at bsdforen.de>
+ */
+
+#ifndef HTTPDECODE_H
+#define HTTPDECODE_H
+
+/**
+ * \file httpdecode.h
+ *
+ * This file contains the public prototypes and defines required to read
+ * compressed data streams. Supported formats are those listed in
+ * RFC2616 section 3.5 (HTTP 1.1 content encodings). Compress decoding is
+ * not yet implemented.
+ *
+ * @brief
+ * Public defines and prototypes to decode encoded HTML streams.
+ * @see
+ * http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.5
+ * @see
+ * httpdecode.c
+ * @author
+ * Dominic Fandrey <kamikaze at bsdforen.de>
+ * @version
+ * 0.3.99.2008.07.07
+ */
+
+/**
+ * This can be used to abuse httpDecodeRandom as a random (read) access layer
+ * for any FILE stream.
+ *
+ * @brief
+ * The source stream ist not encoded.
+ */
+#define ENCODING_RAW 0
+
+/**
+ * @brief
+ * The source stream is deflate encoded.
+ * @see
+ * zlib(3)
+ */
+#define ENCODING_DEFLATE 1
+
+/**
+ * @brief
+ * The source stream is gzip encoded.
+ * @see
+ * gzip(1)
+ * @see
+ * zlib(3)
+ */
+#define ENCODING_GZIP 2
+
+/**
+ * @brief
+ * The source stream is compress encoded.
+ * @see
+ * compress(1)
+ */
+#define ENCODING_COMPRESS 3
+
+
+/**
+ * Opens a given stream for decoding and returns a FILE handle that can be
+ * used with the read and close function. Internally funopen is used
+ * to achieve this.
+ *
+ * @param source
+ * The stream to read the encoded data from.
+ * @param encoding
+ * The encoding type of the source stream.
+ * @param length
+ * The length of the source stream. Use 0 if unknown.
+ * @return
+ * Returns a FILE handle to read an encoded stream.
+ * @see
+ * funopen(3)
+ * @see
+ * fread(3)
+ * @see
+ * fclose(3)
+ */
+FILE * httpDecode(FILE * source, int encoding, size_t length);
+
+/**
+ * This function is a wraper around httpDecode that allows random access
+ * by writing the stream into a temporary file. The file is buffered
+ * by a given number of buffers in memory.
+ * Buffers are overwritten in LRU order.
+ *
+ * @brief
+ * A file backed wrapper around httpDecode for random access.
+ * @param source
+ * The stream to read the encoded data from.
+ * @param encoding
+ * The encoding type of the source stream.
+ * @param length
+ * The length of the source stream. Use 0 if unknown.
+ * @param bufferSize
+ * The size of a buffer.
+ * @param
+ * The number of buffers.
+ * @return
+ * Returns a FILE handle to read an encoded stream.
+ */
+/* TODO
+FILE * httpDecodeRandom(FILE * source, int encoding, size_t length,
+ size_t bufferSize, size_t buffers);
+*/
+
+#endif /* HTTPDECODE_H */
+
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-bugs
mailing list