Tar output mode for installworld

Tim Kientzle kientzle at freebsd.org
Sun Jul 15 06:28:14 UTC 2007


>>> This is easy to implement ... just build
>>> a description of the final archive in a nice verbose
>>> text format such as:
>>
>> ...which is done by NetBSD for the unprivileged release building via
>> build.sh. Anyone interested in working on this should possibly have a
>> look there.

Here's a rough implementation of my core idea.  Add the
attached archive_read_support_format_ntree.c to libarchive
and patch archive_read_support_format_all.c, then
rebuild libarchive and bsdtar.  You'll then be able
to read, extract, etc, a format I'm calling "ntree"
for now. This similar to NetBSD's "metalog" format, except:

1) First line must be "#%ntree".  This is used as a file signature.

2) Blank lines and lines beginning with '#' are ignored.

3) All other lines have the following format:

<filename> <key>=<value> <key>=<value> ...

Where key is one of:
   time:  decimal seconds since beginning of epoch
   gid,uid: decimal group/user ID
   gname,uname: textual group/user name
   mode: octal
   type: as in mtree, defaults to 'file'
   content: name of a file on disk

E.g.,

#%ntree
bin/echo uid=0 gid=0 group=wheel contents=my/bin/echo

I think this should form a reasonable basis against which
to implement tar output for installworld.  I would actually
suggest building the specification file at buildworld time, not
at installworld time.  You could then create a tarball with
   tar -czf system.tgz @specification.ntree
or install directly from the specification file using
   tar -xvpf specification.ntree -C ${DESTDIR}

Some work still remains:
   * Should allow multiple (consecutive) lines for a single file.
   * Need to support more keys, especially "flags" and "link".
   * Need to find a way to encode hardlinks.
   * Need to decide how/whether to reconcile this with mtree.  (This 
could be extended to read regular mtree files, though it's unclear how 
to auto-detect the standard mtree format.)
   * Need to implement a test suite for this format and add it to 
libarchive_test.
   * Should be able to write these files.  (A libarchive writer
could even accumulate various hashes and include them, though
the body per se would be lost.)

Feedback appreciated,

Tim Kientzle

-------------- next part --------------
A non-text attachment was scrubbed...
Name: archive_read_support_format_all.patch
Type: text/x-patch
Size: 493 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-hackers/attachments/20070715/e271b84b/archive_read_support_format_all-0001.bin
-------------- next part --------------
/*-
 * Copyright (c) 2003-2007 Tim Kientzle
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "archive_platform.h"
__FBSDID("$FreeBSD$");

#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#include <stddef.h>
/* #include <stdint.h> */ /* See archive_platform.h */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include "archive.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_read_private.h"

struct ntree {
	struct archive_string	 line;
	size_t			 buffsize;
	char			*buff;
	off_t			 offset;
	int			 fd;
	int			 bid;
	int			 filetype;
};

static int	bid(struct archive_read *);
static int	cleanup(struct archive_read *);
static int	parse_setting(struct archive_read *, struct ntree *,
		    struct archive_entry *, char *, char *);
static int	read_data(struct archive_read *a,
		    const void **buff, size_t *size, off_t *offset);
static ssize_t	readline(struct archive_read *, struct ntree *, char **);
static int	skip(struct archive_read *a);
static int	read_header(struct archive_read *,
		    struct archive_entry *);
static int64_t	ntree_atol10(char **);
static int64_t	ntree_atol8(char **);

int
archive_read_support_format_ntree(struct archive *_a)
{
	struct archive_read *a = (struct archive_read *)_a;
	struct ntree *ntree;
	int r;

	ntree = (struct ntree *)malloc(sizeof(*ntree));
	if (ntree == NULL) {
		archive_set_error(&a->archive, ENOMEM,
		    "Can't allocate ntree data");
		return (ARCHIVE_FATAL);
	}
	memset(ntree, 0, sizeof(*ntree));
	ntree->bid = -1;
	ntree->fd = -1;

	r = __archive_read_register_format(a, ntree,
	    bid, read_header, read_data, skip, cleanup);

	if (r != ARCHIVE_OK)
		free(ntree);
	return (ARCHIVE_OK);
}

static int
cleanup(struct archive_read *a)
{
	struct ntree *ntree;

	ntree = (struct ntree *)(a->format->data);
	archive_string_free(&ntree->line);
	free(ntree->buff);
	free(ntree);
	(a->format->data) = NULL;
	return (ARCHIVE_OK);
}


static int
bid(struct archive_read *a)
{
	struct ntree *ntree;
	ssize_t bytes_read;
	const void *h;
	const char *signature = "#%ntree";
	const char *p;

	ntree = (struct ntree *)(a->format->data);
	if (ntree->bid != -1)
		return (ntree->bid);

	/* Now let's look at the actual header and see if it matches. */
	bytes_read = (a->decompressor->read_ahead)(a, &h, strlen(signature));

	p = h;
	ntree->bid = 0;
	while (bytes_read > 0 && *signature != '\0') {
		if (*p != *signature)
			return (ntree->bid = 0);
		ntree->bid += 8;
		p++;
		signature++;
	}
	return (ntree->bid);
}

static int
read_header(struct archive_read *a, struct archive_entry *entry)
{
	struct ntree *ntree;
	char *p, *q, *end;
	ssize_t len;
	int r = ARCHIVE_OK, r1;

	ntree = (struct ntree *)(a->format->data);
	ntree->filetype = AE_IFREG;

	if (ntree->fd >= 0) {
		close(ntree->fd);
		ntree->fd = -1;
	}

	p = NULL;
	while (p == NULL) {
		len = readline(a, ntree, &p);
		if (len < 0)
			return (ARCHIVE_FATAL);
		if (len == 0)
			return (ARCHIVE_EOF);
		if (p[0] == '#')
			p = NULL;
	}

	end = p + len;

	/* Null-terminate each component. */
	/* TODO: Allow spaces within filenames by using quotes. */
	for (q = p; q < end; q++)
		if (*q == ' ' || *q == '\t' || *q == '\n')
			*q = '\0';

	archive_entry_copy_pathname(entry, p);

	p += strlen(p);
	while (p < end) {
		q = p + strlen(p);
		r1 = parse_setting(a, ntree, entry, p, q);
		if (r1 != ARCHIVE_OK)
			r = r1;
		p = q + 1;
	}
	return r;
}

static int
parse_setting(struct archive_read *a, struct ntree *ntree, struct archive_entry *entry, char *key, char *end)
{
	struct stat st;
	char *val;


	if (end == key)
		return (ARCHIVE_OK);

	val = strchr(key, '=');
	if (val == NULL) {
		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
		    "Malformed attribute \"%s\" (%d)", key, key[0]);
		return (ARCHIVE_WARN);
	}

	*val = '\0';
	++val;

	switch (key[0]) {
	case 'c':
		if (strcmp(key, "content") == 0) {
			ntree->fd = open(val, O_RDONLY);
			if (ntree->fd < 0) {
				archive_set_error(&a->archive, errno,
				    "Can't open \"%s\"", val);
				return (ARCHIVE_WARN);
			}
			fstat(ntree->fd, &st);
			archive_entry_set_size(entry, st.st_size);
			break;
		}
	case 'g':
		if (strcmp(key, "gid") == 0) {
			archive_entry_set_gid(entry, ntree_atol10(&val));
			break;
		}
		if (strcmp(key, "gname") == 0) {
			archive_entry_copy_gname(entry, val);
			break;
		}
	case 'm':
		if (strcmp(key, "mode") == 0) {
			if (val[0] == '0') {
				archive_entry_set_mode(entry,
				    ntree->filetype | (07777 & ntree_atol8(&val)));
			} else
				archive_set_error(&a->archive,
				    ARCHIVE_ERRNO_FILE_FORMAT,
				    "Symbolic mode \"%s\" unsupported", val);
			break;
		}
	case 't':
		if (strcmp(key, "type") == 0) {
			switch (val[0]) {
			case 'b':
				if (strcmp(val, "block") == 0) {
					ntree->filetype = AE_IFBLK;
					break;
				}
			case 'c':
				if (strcmp(val, "char") == 0) {
					ntree->filetype = AE_IFCHR;
					break;
				}
			case 'd':
				if (strcmp(val, "dir") == 0) {
					ntree->filetype = AE_IFDIR;
					break;
				}
			case 'f':
				if (strcmp(val, "fifo") == 0) {
					ntree->filetype = AE_IFIFO;
					break;
				}
				if (strcmp(val, "file") == 0) {
					ntree->filetype = AE_IFREG;
					break;
				}
			case 'l':
				if (strcmp(val, "link") == 0) {
					ntree->filetype = AE_IFLNK;
					break;
				}
			default:
				archive_set_error(&a->archive,
				    ARCHIVE_ERRNO_FILE_FORMAT,
				    "Unrecognized file type \"%s\"", val);
				return (ARCHIVE_WARN);
			}
			archive_entry_set_filetype(entry, ntree->filetype);
			break;
		}
		if (strcmp(key, "time") == 0) {
			archive_entry_set_mtime(entry, ntree_atol10(&val), 0);
			break;
		}
	case 'u':
		if (strcmp(key, "uid") == 0) {
			archive_entry_set_uid(entry, ntree_atol10(&val));
			break;
		}
		if (strcmp(key, "uname") == 0) {
			archive_entry_copy_uname(entry, val);
			break;
		}
	default:
		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
		    "Unrecognized key %s=%s", key, val);
		return (ARCHIVE_WARN);
	}
	return (ARCHIVE_OK);
}

static int
read_data(struct archive_read *a, const void **buff, size_t *size, off_t *offset)
{
	ssize_t bytes_read;
	struct ntree *ntree;

	ntree = (struct ntree *)(a->format->data);
	if (ntree->fd < 0) {
		*buff = NULL;
		*offset = 0;
		*size = 0;
		return (ARCHIVE_EOF);
	}
	if (ntree->buff == NULL) {
		ntree->buffsize = 64 * 1024;
		ntree->buff = malloc(ntree->buffsize);
		if (ntree->buff == NULL) {
			archive_set_error(&a->archive, ENOMEM,
			    "Can't allocate memory");
		}
	}

	*buff = ntree->buff;
	*offset = ntree->offset;
	bytes_read = read(ntree->fd, ntree->buff, ntree->buffsize);
	if (bytes_read < 0) {
		archive_set_error(&a->archive, errno, "Can't read");
		return (ARCHIVE_WARN);
	}
	if (bytes_read == 0) {
		*size = 0;
		return (ARCHIVE_EOF);
	}
	ntree->offset += bytes_read;
	*size = (size_t)bytes_read;
	return (ARCHIVE_OK);
}

/* Skip does nothing except possibly close the contents file. */
static int
skip(struct archive_read *a)
{
	struct ntree *ntree;

	ntree = (struct ntree *)(a->format->data);
	if (ntree->fd >= 0) {
		close(ntree->fd);
		ntree->fd = -1;
	}
	return (ARCHIVE_OK);
}

/*
 * Note that this implementation does not (and should not!) obey
 * locale settings; you cannot simply substitute strtol here, since
 * it does obey locale.
 */
static int64_t
ntree_atol8(char **p)
{
	int64_t	l, limit, last_digit_limit;
	int digit, base;

	base = 8;
	limit = INT64_MAX / base;
	last_digit_limit = INT64_MAX % base;

	l = 0;
	digit = **p - '0';
	while (digit >= 0 && digit < base) {
		if (l>limit || (l == limit && digit > last_digit_limit)) {
			l = INT64_MAX; /* Truncate on overflow. */
			break;
		}
		l = (l * base) + digit;
		digit = *++(*p) - '0';
	}
	return (l);
}

/*
 * Note that this implementation does not (and should not!) obey
 * locale settings; you cannot simply substitute strtol here, since
 * it does obey locale.
 */
static int64_t
ntree_atol10(char **p)
{
	int64_t l, limit, last_digit_limit;
	int base, digit, sign;

	base = 10;
	limit = INT64_MAX / base;
	last_digit_limit = INT64_MAX % base;

	if (**p == '-') {
		sign = -1;
		++(*p);
	} else
		sign = 1;

	l = 0;
	digit = **p - '0';
	while (digit >= 0 && digit < base) {
		if (l > limit || (l == limit && digit > last_digit_limit)) {
			l = UINT64_MAX; /* Truncate on overflow. */
			break;
		}
		l = (l * base) + digit;
		digit = *++(*p) - '0';
	}
	return (sign < 0) ? -l : l;
}

/*
 * Returns length of line (including trailing newline)
 * or negative on error.  'start' argument is updated to
 * point to first character of line.
 */
static ssize_t
readline(struct archive_read *a, struct ntree *ntree, char **start)
{
	ssize_t bytes_read;
	ssize_t total_size = 0;
	const void *t;
	const char *s;
	void *p;

	/* Accumulate line in a line buffer. */
	for (;;) {
		/* Read some more. */
		bytes_read = (a->decompressor->read_ahead)(a, &t, 1);
		if (bytes_read == 0)
			return (0);
		if (bytes_read < 0)
			return (ARCHIVE_FATAL);
		s = t;  /* Start of line? */
		p = memchr(t, '\n', bytes_read);
		/* If we found '\n', trim the read. */
		if (p != NULL) {
			bytes_read = 1 + ((const char *)p) - s;
		}
		if (archive_string_ensure(&ntree->line,
			total_size + bytes_read) == NULL) {
			archive_set_error(&a->archive, ENOMEM,
			    "Can't allocate working buffer");
			return (ARCHIVE_FATAL);
		}
		memcpy(ntree->line.s + total_size, t, bytes_read);
		(a->decompressor->consume)(a, bytes_read);
		total_size += bytes_read;
		/* If we found '\n', clean up and return. */
		if (p != NULL) {
			*start = ntree->line.s;
			return (total_size);
		}
	}
}


More information about the freebsd-hackers mailing list