bin/125350: [patch] src/lib/libfetch add support for deflate and gzip encoded http downloads

Dominic Fandrey kamikaze at bsdforen.de
Sun Jul 6 23:20:01 UTC 2008


>Number:         125350
>Category:       bin
>Synopsis:       [patch] src/lib/libfetch add support for deflate and gzip encoded http downloads
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sun Jul 06 23:20:00 UTC 2008
>Closed-Date:
>Last-Modified:
>Originator:     Dominic Fandrey
>Release:        RELENG_7
>Organization:
private
>Environment:
FreeBSD mobileKamikaze.norad 7.0-STABLE FreeBSD 7.0-STABLE #1: Sat Jun  7 14:00:26 CEST 2008     root at mobileKamikaze.norad:/usr/obj/HP6510b/amd64/usr/src/sys/HP6510b  amd64
>Description:
The patch adds support for gzip and deflate compression for html downloads to libfetch.

This is work in progress. Things yet to be done in order of preference:
1. Add support for compress encoding.
2. Implement random access layer (not required/useful for libfetch).
3. Clean up http.c (a seemingly monumental task).
>How-To-Repeat:

>Fix:


Patch attached with submission follows:

diff -Pur src/lib/libfetch.orig/Makefile src/lib/libfetch/Makefile
--- src/lib/libfetch.orig/Makefile	2008-07-07 00:56:11.000000000 +0200
+++ src/lib/libfetch/Makefile	2008-07-07 00:56:36.000000000 +0200
@@ -4,7 +4,7 @@
 
 LIB=		fetch
 CFLAGS+=	-I.
-SRCS=		fetch.c common.c ftp.c http.c file.c \
+SRCS=		fetch.c common.c ftp.c http.c httpdecode.c file.c \
 		ftperr.h httperr.h
 INCS=		fetch.h
 MAN=		fetch.3
@@ -20,6 +20,8 @@
 LDADD=		-lssl -lcrypto
 .endif
 
+LDADD+=		-lz
+
 CFLAGS+=	-DFTP_COMBINE_CWDS
 
 CSTD?=		c99
diff -Pur src/lib/libfetch.orig/http.c src/lib/libfetch/http.c
--- src/lib/libfetch.orig/http.c	2008-07-07 00:56:11.000000000 +0200
+++ src/lib/libfetch/http.c	2008-07-07 00:59:01.000000000 +0200
@@ -82,6 +82,7 @@
 #include "fetch.h"
 #include "common.h"
 #include "httperr.h"
+#include "httpdecode.h"
 
 /* Maximum number of redirects to follow */
 #define MAX_REDIRECT 5
@@ -336,6 +337,7 @@
 	hdr_error = -1,
 	hdr_end = 0,
 	hdr_unknown = 1,
+	hdr_content_encoding,
 	hdr_content_length,
 	hdr_content_range,
 	hdr_last_modified,
@@ -349,6 +351,7 @@
 	hdr_t		 num;
 	const char	*name;
 } hdr_names[] = {
+	{ hdr_content_encoding,		"Content-Encoding" },
 	{ hdr_content_length,		"Content-Length" },
 	{ hdr_content_range,		"Content-Range" },
 	{ hdr_last_modified,		"Last-Modified" },
@@ -496,6 +499,21 @@
 }
 
 /*
+ * Parse a content-encoding header
+ */
+static int
+http_parse_encoding(const char *p)
+{
+	if (strcmp("gzip", p) == 0)
+		return(ENCODING_GZIP);
+	if (strcmp("deflate", p) == 0)
+		return(ENCODING_DEFLATE);
+	if (strcmp("compress", p) == 0)
+		return(ENCODING_COMPRESS);
+	return(ENCODING_RAW);
+}
+
+/*
  * Parse a content-length header
  */
 static int
@@ -800,14 +818,17 @@
 	conn_t *conn;
 	struct url *url, *new;
 	int chunked, direct, need_auth, noredirect, verbose;
-	int e, i, n, val;
+	int e, i, n, val, encoding;
 	off_t offset, clength, length, size;
 	time_t mtime;
 	const char *p;
-	FILE *f;
+	FILE *f, *d;
 	hdr_t h;
 	char hbuf[MAXHOSTNAMELEN + 7], *host;
 
+	f = NULL;
+	d = NULL;
+
 	direct = CHECK_FLAG('d');
 	noredirect = CHECK_FLAG('A');
 	verbose = CHECK_FLAG('v');
@@ -834,6 +855,7 @@
 		length = -1;
 		size = -1;
 		mtime = 0;
+		encoding = ENCODING_RAW;
 
 		/* check port */
 		if (!url->port)
@@ -919,6 +941,7 @@
 			http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname());
 		if (url->offset > 0)
 			http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
+		http_cmd(conn, "Accept-Encoding: gzip,deflate");
 		http_cmd(conn, "Connection: close");
 		http_cmd(conn, "");
 
@@ -999,6 +1022,9 @@
 			case hdr_error:
 				http_seterr(HTTP_PROTOCOL_ERROR);
 				goto ouch;
+			case hdr_content_encoding:
+				encoding = http_parse_encoding(p);
+				break;
 			case hdr_content_length:
 				http_parse_length(p, &clength);
 				break;
@@ -1119,7 +1145,9 @@
 
 	/* fill in stats */
 	if (us) {
-		us->size = size;
+		/* we can only predict the size of unencoded streams */
+		if (encoding == ENCODING_RAW)
+			us->size = size;
 		us->atime = us->mtime = mtime;
 	}
 
@@ -1139,6 +1167,13 @@
 		goto ouch;
 	}
 
+	/* wrap the decoder around it */
+	if ((d = httpDecode(f, encoding, size)) == NULL) {
+		fetch_syserr();
+		fclose(f);
+		goto ouch;
+	}
+
 	if (url != URL)
 		fetchFreeURL(url);
 	if (purl)
@@ -1150,7 +1185,7 @@
 		f = NULL;
 	}
 
-	return (f);
+	return (d);
 
 ouch:
 	if (url != URL)
diff -Pur src/lib/libfetch.orig/httpdecode.c src/lib/libfetch/httpdecode.c
--- src/lib/libfetch.orig/httpdecode.c	1970-01-01 01:00:00.000000000 +0100
+++ src/lib/libfetch/httpdecode.c	2008-07-07 00:56:36.000000000 +0200
@@ -0,0 +1,411 @@
+/*
+ * I wrote this and I say you can do whatever you want with it. Period.
+ * However, I'd love to hear from you what you've done.
+ *
+ * Dominic Fandrey <kamikaze at bsdforen.de>
+ */
+
+/**
+ * \file httpdecode.c
+ *
+ * This file contains the implemention of the prototypes defined in
+ * httpdecode.h.
+ *
+ * @brief
+ *	HTTP content decoding implemention.
+ * @see
+ *	httpdecode.h
+ * @author
+ *	Dominic Fandrey <kamikaze at bsdforen.de>
+ * @version
+ *	0.1.99.2008.07.07
+ */
+
+/* LINTLIBRARY */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <zlib.h>
+#include "httpdecode.h"
+
+/* PRIVATE STRUCTS */
+
+/**
+ * @brief
+ *	The necessary data to maintain a zlib decoding stream.
+ */
+struct zlibStream {
+	/**
+	 * @brief
+	 *	The original stream with the encoded data.
+	 */
+	FILE * source;
+
+	/**
+	 * @brief
+	 *	A read buffer for the encoded stream.
+	 */
+	char * buffer;
+
+	/**
+	 * This specifies the encoding of the data. The values
+	 *  ENCODING_GZIP and ENCODING_DEFLATE are possible.
+	 *
+	 * @brief
+	 *	The encoding type of the stream.
+	 */
+	int encoding;
+
+	/**
+	 * @brief
+	 *	The stream data used by zlib.
+	 */
+	z_stream stream;
+
+	/**
+	 * The length of the source stream. The value 0 means that the length
+	 * is unknown higher values will be used to automatically close the
+	 * stream. This prevents overreading and allows the continued use
+	 * of the underlying HTTP stream.
+	 *
+	 * @brief
+	 *	The length of the encoded source stream.
+	 */
+	size_t length;
+
+	/**
+	 * @brief
+	 *	The amount of data that has been read.
+	 */
+	size_t read;
+
+	/**
+	 * @brief
+	 *	The size of the buffer for encoded data.
+	 */
+	size_t bufferSize;
+
+	/**
+	 * @brief
+	 *	The amount of data available in the buffer.
+	 */
+	size_t bufferUsed;
+};
+
+/* PRIVATE PROTOTYPES */
+void moveBuffer(struct zlibStream * cookie, char * newBuffer, size_t size);
+FILE * zlibOpen(struct zlibStream * cookie);
+size_t zlibRead(struct zlibStream * cookie, char * buffer, size_t length);
+int zlibClose(struct zlibStream * cookie);
+/* TODO
+FILE * compressOpen(struct zlibStream * cookie);
+size_t compressRead(struct zlibStream * cookie, char * buffer, size_t length);
+int compressClose(struct zlibStream * cookie);
+FILE * randomOpen(struct zlibStream * cookie);
+size_t randomRead(struct zlibStream * cookie, char * buffer, size_t length);
+int randomSeek(struct zlibStream * cookie, off_t offset, int whence);
+int randomClose(struct zlibStream * cookie);
+*/
+
+/* PUBLIC FUNCTIONS */
+
+/**
+ * Opens a given stream for decoding and returns a FILE handle that can be
+ * used with the fread and fclose function. Internally funopen is used
+ * to achieve this.
+ *
+ * In case of failure NULL is returned and errno is set to EINVAL for
+ * invalid parameters and ENOMEM for insufficient memory.
+ *
+ * @brief
+ *	Open a FILE stream to read decoded data from.
+ * @param source
+ *	The stream to read the encoded data from.
+ * @param encoding
+ *	The encoding type of the source stream.
+ * @param length
+ *	The length of the source stream. Use 0 if unknown.
+ * @return
+ *	Returns a FILE handle to read an encoded stream.
+ * @see
+ *	funopen(3)
+ * @see
+ *	fread(3)
+ * @see
+ *	fclose(3)
+ */
+FILE * httpDecode(FILE * source, int encoding, size_t length) {
+	struct zlibStream * zlibCookie;
+
+	switch (encoding) {
+	case ENCODING_RAW:
+		return(source);
+	case ENCODING_GZIP: case ENCODING_DEFLATE:
+		zlibCookie = malloc(sizeof(struct zlibStream));
+		if (zlibCookie == NULL) /* errno == ENOMEM */
+			return(NULL);
+		zlibCookie->buffer = NULL;
+		zlibCookie->bufferSize = 0;
+		zlibCookie->source = source;
+		zlibCookie->length = length;
+		zlibCookie->read = 0;
+		zlibCookie->encoding = encoding;
+		return(zlibOpen(zlibCookie));
+	case ENCODING_COMPRESS:
+		return(NULL);
+	}
+
+	return(NULL);
+}
+
+/**
+ * This function is a wraper around httpDecode that allows random access
+ * by writing the stream into a temporary file. The file is buffered
+ * by a given number of buffers in memory.
+ * Buffers are overwritten in LRU order.
+ *
+ * @brief
+ *	A file backed wrapper around httpDecode for random access.
+ * @param source
+ *	The stream to read the encoded data from.
+ * @param encoding
+ *	The encoding type of the source stream.
+ * @param length
+ *	The length of the source stream. Use 0 if unknown.
+ * @param bufferSize
+ *	The size of a buffer.
+ * @param
+ *	The number of buffers.
+ * @return
+ *	Returns a FILE handle to read an encoded stream.
+ */
+/* TODO
+FILE * httpDecodeRandom(FILE * source, int encoding, size_t length,
+	size_t bufferSize, size_t buffers) {
+	return(source);
+}
+*/
+
+/* PRIVATE FUNCTIONS */
+
+/**
+ * This function replaces the read buffer in the cookie with the new buffer.
+ * The old buffer is freed but the contents are saved in the new buffer.
+ * However, no security checks are performed.
+ * That means that newBuffer must at least have the same size as the old one.
+ *
+ * @brief
+ *	Replace the current read buffer.
+ * @param cookie
+ *	Contains all the data necessary to maintain the stream.
+ * @param newBuffer
+ *	The new buffer to use.
+ */
+void moveBuffer(struct zlibStream * cookie, char * newBuffer, size_t size) {
+	memmove(newBuffer, cookie->buffer, cookie->bufferUsed);
+	free(cookie->buffer);
+	cookie->buffer = newBuffer;
+	cookie->bufferSize = size;
+}
+
+/**
+ * This function initializes a zlib stream and creates the file handler
+ * that will later be used to pull data from the stream.
+ *
+ * Upon any kind of failure errno is set to one of the following values:
+ * EINVAL	This can either indicate that an unsupported encoding
+ *		was given or that this code and the used zlib implemention
+ *		are incompatible.
+ * ENOMEM	Indicates that the available memory is insuficient for
+ *		the decode buffer, zlib or funopen.
+ *
+ * @brief
+ *	Open a zlib stream.
+ * @param cookie
+ *	Contains all the data necessary to maintain the stream.
+ * @return
+ *	A FILE* pointer or NULL in case of failure.
+ */
+FILE * zlibOpen(struct zlibStream * cookie) {
+	int wbits;
+	z_stream * stream = &(cookie->stream);
+
+	/* Initialize zlib stream data. */
+	stream->zalloc = Z_NULL;
+	stream->zfree = Z_NULL;
+	stream->opaque = Z_NULL;
+	stream->avail_in = 0;
+	stream->next_in = (Bytef *) cookie->buffer;
+
+	/* Set window bits for the selected encoding. */
+	switch(cookie->encoding) {
+	case ENCODING_DEFLATE:
+		wbits = -MAX_WBITS;
+		break;
+	case ENCODING_GZIP:
+		wbits = MAX_WBITS + 16;
+		break;
+	default:
+		errno = EINVAL;
+		return(NULL);
+	}
+
+	/* Create the decoding buffer. */
+	cookie->bufferSize = 32768; /* Zlib breaks with a smaller buffer. */
+	cookie->bufferUsed = 0;
+	cookie->buffer = malloc(cookie->bufferSize);
+	if (cookie->buffer == NULL)
+		return(NULL); /* errno == ENOMEM */
+
+	/* Initialize stream for decoding. */
+	switch(inflateInit2(stream, wbits)) {
+	case Z_OK:
+		errno = 0;
+		break;
+	case Z_MEM_ERROR:
+		errno = ENOMEM;
+		break;
+	case Z_STREAM_ERROR: /* This is not supposed to happen. */
+		errno = EINVAL;
+		break;
+	}
+	if (errno) {
+		free(cookie->buffer);
+		return(NULL);
+	}
+
+	/* Create the file stream to return. */
+	return(funopen(cookie,(int (*)(void *, char *, int)) zlibRead,
+		NULL, NULL, (int (*)(void *)) zlibClose));
+}
+
+/**
+ * Writes a chunk of decoded data to the given buffer.
+ *
+ * In case of an error (size_t) -1 is returned to indicate to the funopen
+ * wrapper that an error occured. In such a case errno is set to EIO.
+ *
+ * An error does not cause the stream to be closed.
+ *
+ * @brief
+ *	Read decoded data from the encoded stream.
+ * @param cookie
+ *	Contains all the data necessary to maintain the stream.
+ * @param buffer
+ *	The buffer to write the decoded data to.
+ * @param length
+ *	The space available in the buffer.
+ * @return
+ *	The number of bytes written to the buffer or (size_t) -1 in case of
+ *	failure.
+ */
+size_t zlibRead(struct zlibStream * cookie, char * buffer, size_t length) {
+	char * tmpBuffer;
+	size_t growth, maxRead, bufferAvailable, flushed;
+	int zlibStatus;
+	z_stream * stream = &(cookie->stream);
+
+	/* 
+	 * Adjust buffer size if the target buffer is larger than 2 times
+	 * the source buffer.
+	 */
+	if ((length >> 1) > cookie->bufferSize) {
+		tmpBuffer = malloc(length >> 1);
+
+		/*
+		 * If creating a new buffer fails pretend never to have
+		 * attempted it.
+		 */
+		if (tmpBuffer == NULL)
+			errno = 0;
+		else
+			/* Move data from the old buffer to the new one. */
+			moveBuffer(cookie, tmpBuffer, length >> 1);
+	}
+
+	/* Run until the target buffer has been filled. */
+	flushed = 0;
+	while (length > 0) {
+		/* If the input buffer is not full, fill it. */
+		growth = 0;
+		maxRead = cookie->length - cookie->read;
+		bufferAvailable = cookie->bufferSize - cookie->bufferUsed;
+		bufferAvailable = (maxRead < bufferAvailable \
+			? maxRead : bufferAvailable);
+		if (bufferAvailable > 0) {
+			growth = fread(cookie->buffer, sizeof(char), \
+				bufferAvailable, cookie->source);
+			/* Forward errors. */
+			if (ferror(cookie->source))
+				return((size_t) -1);
+			cookie->bufferUsed += growth;
+			cookie->read += growth;
+		}
+
+		/* Decode data from the read to the target buffer. */
+		stream->avail_in = cookie->bufferUsed;
+		stream->next_in = (Bytef *) cookie->buffer;
+		stream->avail_out = length;
+		stream->next_out = (Bytef *) buffer;
+		zlibStatus = inflate(stream, Z_SYNC_FLUSH);
+
+		/* The amount of data just written to the target buffer. */
+		growth = length - stream->avail_out;
+
+		/* Adjust the read buffer. */
+		memmove(cookie->buffer, stream->next_in, \
+			(size_t) stream->avail_in);
+		cookie->bufferUsed = stream->avail_in;
+		stream->next_in = (Bytef *) cookie->buffer;
+
+		/* Adjust the target buffer. */
+		flushed += growth;
+		buffer += growth;
+		length = stream->avail_out;
+
+		/* Deal with errors. */
+		switch (zlibStatus) {
+		case Z_OK:
+			break;
+		case Z_STREAM_END:
+			length = 0;
+			break;
+		case Z_BUF_ERROR:
+			/* The read buffer is too small, try to double it. */
+			tmpBuffer = malloc(cookie->bufferSize << 1);
+			if (!tmpBuffer) /* errno == ENOMEN */
+				return((size_t) -1);
+			moveBuffer(cookie, tmpBuffer, cookie->bufferSize << 1);
+			break;
+		case Z_NEED_DICT: case Z_DATA_ERROR: case Z_STREAM_ERROR:
+			errno = EIO;
+			return((size_t) -1);
+		case Z_MEM_ERROR:
+			errno = ENOMEM;
+			return((size_t) -1);
+		}
+	}
+
+	return(flushed);
+}
+
+/**
+ * Closes the decoding stream and frees all buffers.
+ *
+ * @brief
+ *	Closes the decoding stream.
+ * @param cookie
+ *	Contains all the data necessary to maintain the stream.
+ * @return
+ *	Always 0 for success.
+ */
+int zlibClose(struct zlibStream * cookie) {
+	inflateEnd(&(cookie->stream));
+	free(cookie->buffer);
+	free(cookie);
+	return(0);
+}
+
diff -Pur src/lib/libfetch.orig/httpdecode.h src/lib/libfetch/httpdecode.h
--- src/lib/libfetch.orig/httpdecode.h	1970-01-01 01:00:00.000000000 +0100
+++ src/lib/libfetch/httpdecode.h	2008-07-07 00:56:36.000000000 +0200
@@ -0,0 +1,116 @@
+/*
+ * I wrote this and I say you can do whatever you want with it. Period.
+ * However, I'd love to hear from you what you've done.
+ *
+ * Dominic Fandrey <kamikaze at bsdforen.de>
+ */
+
+#ifndef HTTPDECODE_H
+#define HTTPDECODE_H
+
+/**
+ * \file httpdecode.h
+ *
+ * This file contains the public prototypes and defines required to read
+ * compressed data streams. Supported formats are those listed in
+ * RFC2616 section 3.5 (HTTP 1.1 content encodings). Compress decoding is
+ * not yet implemented.
+ *
+ * @brief
+ *	Public defines and prototypes to decode encoded HTML streams.
+ * @see
+ *	http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.5
+ * @see
+ *	httpdecode.c
+ * @author
+ *	Dominic Fandrey <kamikaze at bsdforen.de>
+ * @version
+ *	0.3.99.2008.07.07
+ */
+
+/**
+ * This can be used to abuse httpDecodeRandom as a random (read) access layer
+ * for any FILE stream.
+ *
+ * @brief
+ *	The source stream ist not encoded.
+ */
+#define ENCODING_RAW		0
+
+/**
+ * @brief
+ *	The source stream is deflate encoded.
+ * @see
+ *	zlib(3)
+ */
+#define ENCODING_DEFLATE	1
+
+/**
+ * @brief
+ *	The source stream is gzip encoded.
+ * @see
+ *	gzip(1)
+ * @see
+ *	zlib(3)
+ */
+#define ENCODING_GZIP		2
+
+/**
+ * @brief
+ *	The source stream is compress encoded.
+ * @see
+ *	compress(1)
+ */
+#define ENCODING_COMPRESS	3
+
+
+/**
+ * Opens a given stream for decoding and returns a FILE handle that can be
+ * used with the read and close function. Internally funopen is used
+ * to achieve this.
+ *
+ * @param source
+ *	The stream to read the encoded data from.
+ * @param encoding
+ *	The encoding type of the source stream.
+ * @param length
+ *	The length of the source stream. Use 0 if unknown.
+ * @return
+ *	Returns a FILE handle to read an encoded stream.
+ * @see
+ *	funopen(3)
+ * @see
+ *	fread(3)
+ * @see
+ *	fclose(3)
+ */
+FILE * httpDecode(FILE * source, int encoding, size_t length);
+
+/**
+ * This function is a wraper around httpDecode that allows random access
+ * by writing the stream into a temporary file. The file is buffered
+ * by a given number of buffers in memory.
+ * Buffers are overwritten in LRU order.
+ *
+ * @brief
+ *	A file backed wrapper around httpDecode for random access.
+ * @param source
+ *	The stream to read the encoded data from.
+ * @param encoding
+ *	The encoding type of the source stream.
+ * @param length
+ *	The length of the source stream. Use 0 if unknown.
+ * @param bufferSize
+ *	The size of a buffer.
+ * @param
+ *	The number of buffers.
+ * @return
+ *	Returns a FILE handle to read an encoded stream.
+ */
+/* TODO
+FILE * httpDecodeRandom(FILE * source, int encoding, size_t length,
+	size_t bufferSize, size_t buffers);
+*/
+
+#endif /* HTTPDECODE_H */
+


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list