svn commit: r201170 - head/lib/libarchive

Tim Kientzle kientzle at FreeBSD.org
Tue Dec 29 06:34:23 UTC 2009


Author: kientzle
Date: Tue Dec 29 06:34:23 2009
New Revision: 201170
URL: http://svn.freebsd.org/changeset/base/201170

Log:
  Essentially eliminate problems with false hardlinks in POSIX cpio
  archives by generating synthetic ino values and mapping values
  from disk into the new values.

Modified:
  head/lib/libarchive/archive_write_set_format_cpio.c

Modified: head/lib/libarchive/archive_write_set_format_cpio.c
==============================================================================
--- head/lib/libarchive/archive_write_set_format_cpio.c	Tue Dec 29 06:27:19 2009	(r201169)
+++ head/lib/libarchive/archive_write_set_format_cpio.c	Tue Dec 29 06:34:23 2009	(r201170)
@@ -54,6 +54,12 @@ static int64_t	format_octal_recursive(in
 
 struct cpio {
 	uint64_t	  entry_bytes_remaining;
+
+	int64_t		  ino_next;
+
+	struct		 { int64_t old; int new;} *ino_list;
+	size_t		  ino_list_size;
+	size_t		  ino_list_next;
 };
 
 struct cpio_header {
@@ -103,35 +109,102 @@ archive_write_set_format_cpio(struct arc
 	return (ARCHIVE_OK);
 }
 
+/*
+ * Ino values are as long as 64 bits on some systems; cpio format
+ * only allows 18 bits and relies on the ino values to identify hardlinked
+ * files.  So, we can't merely "hash" the ino numbers since collisions
+ * would corrupt the archive.  Instead, we generate synthetic ino values
+ * to store in the archive and maintain a map of original ino values to
+ * synthetic ones so we can preserve hardlink information.
+ *
+ * TODO: Make this more efficient.  It's not as bad as it looks (most
+ * files don't have any hardlinks and we don't do any work here for those),
+ * but it wouldn't be hard to do better.
+ *
+ * TODO: Work with dev/ino pairs here instead of just ino values.
+ */
+static int
+synthesize_ino_value(struct cpio *cpio, struct archive_entry *entry)
+{
+	int64_t ino = archive_entry_ino64(entry);
+	int ino_new;
+	size_t i;
+
+	/*
+	 * If no index number was given, don't assign one.  In
+	 * particular, this handles the end-of-archive marker
+	 * correctly by giving it a zero index value.  (This is also
+	 * why we start our synthetic index numbers with one below.)
+	 */
+	if (ino == 0)
+		return (0);
+
+	/* Don't store a mapping if we don't need to. */
+	if (archive_entry_nlink(entry) < 2) {
+		return ++cpio->ino_next;
+	}
+
+	/* Look up old ino; if we have it, this is a hardlink
+	 * and we reuse the same value. */
+	for (i = 0; i < cpio->ino_list_next; ++i) {
+		if (cpio->ino_list[i].old == ino)
+			return (cpio->ino_list[i].new);
+	}
+
+	/* Assign a new index number. */
+	ino_new = ++cpio->ino_next;
+
+	/* Ensure space for the new mapping. */
+	if (cpio->ino_list_size <= cpio->ino_list_next) {
+		size_t newsize = cpio->ino_list_size < 512
+		    ? 512 : cpio->ino_list_size * 2;
+		void *newlist = realloc(cpio->ino_list,
+		    sizeof(cpio->ino_list[0]) * newsize);
+		if (newlist == NULL)
+			return (-1);
+
+		cpio->ino_list_size = newsize;
+		cpio->ino_list = newlist;
+	}
+
+	/* Record and return the new value. */
+	cpio->ino_list[cpio->ino_list_next].old = ino;
+	cpio->ino_list[cpio->ino_list_next].new = ino_new;
+	++cpio->ino_list_next;
+	return (ino_new);
+}
+
 static int
 archive_write_cpio_header(struct archive_write *a, struct archive_entry *entry)
 {
 	struct cpio *cpio;
 	const char *p, *path;
-	int pathlength, ret;
+	int pathlength, ret, ret2;
+	int64_t	ino;
 	struct cpio_header	 h;
 
 	cpio = (struct cpio *)a->format_data;
-	ret = 0;
+	ret2 = ARCHIVE_OK;
 
 	path = archive_entry_pathname(entry);
-	pathlength = strlen(path) + 1; /* Include trailing null. */
+	pathlength = (int)strlen(path) + 1; /* Include trailing null. */
 
 	memset(&h, 0, sizeof(h));
 	format_octal(070707, &h.c_magic, sizeof(h.c_magic));
 	format_octal(archive_entry_dev(entry), &h.c_dev, sizeof(h.c_dev));
-	/*
-	 * TODO: Generate artificial inode numbers rather than just
-	 * re-using the ones off the disk.  That way, the 18-bit c_ino
-	 * field only limits the number of files in the archive.
-	 */
-	if ((int)archive_entry_ino(entry) > 0777777) {
+
+	ino = synthesize_ino_value(cpio, entry);
+	if (ino < 0) {
+		archive_set_error(&a->archive, ENOMEM,
+		    "No memory for ino translation table");
+		return (ARCHIVE_FATAL);
+	} else if (ino > 0777777) {
 		archive_set_error(&a->archive, ERANGE,
-		    "large inode number truncated");
-		ret = ARCHIVE_WARN;
+		    "Too many files for this cpio format");
+		return (ARCHIVE_FATAL);
 	}
+	format_octal(ino & 0777777, &h.c_ino, sizeof(h.c_ino));
 
-	format_octal(archive_entry_ino(entry) & 0777777, &h.c_ino, sizeof(h.c_ino));
 	format_octal(archive_entry_mode(entry), &h.c_mode, sizeof(h.c_mode));
 	format_octal(archive_entry_uid(entry), &h.c_uid, sizeof(h.c_uid));
 	format_octal(archive_entry_gid(entry), &h.c_gid, sizeof(h.c_gid));
@@ -170,6 +243,8 @@ archive_write_cpio_header(struct archive
 	if (p != NULL  &&  *p != '\0')
 		ret = (a->compressor.write)(a, p, strlen(p));
 
+	if (ret == ARCHIVE_OK)
+		ret = ret2;
 	return (ret);
 }
 
@@ -218,17 +293,15 @@ format_octal_recursive(int64_t v, char *
 		return (v);
 	v = format_octal_recursive(v, p+1, s-1);
 	*p = '0' + (v & 7);
-	return (v >>= 3);
+	return (v >> 3);
 }
 
 static int
 archive_write_cpio_finish(struct archive_write *a)
 {
-	struct cpio *cpio;
 	int er;
 	struct archive_entry *trailer;
 
-	cpio = (struct cpio *)a->format_data;
 	trailer = archive_entry_new();
 	/* nlink = 1 here for GNU cpio compat. */
 	archive_entry_set_nlink(trailer, 1);
@@ -244,6 +317,7 @@ archive_write_cpio_destroy(struct archiv
 	struct cpio *cpio;
 
 	cpio = (struct cpio *)a->format_data;
+	free(cpio->ino_list);
 	free(cpio);
 	a->format_data = NULL;
 	return (ARCHIVE_OK);
@@ -253,7 +327,8 @@ static int
 archive_write_cpio_finish_entry(struct archive_write *a)
 {
 	struct cpio *cpio;
-	int to_write, ret;
+	size_t to_write;
+	int ret;
 
 	cpio = (struct cpio *)a->format_data;
 	ret = ARCHIVE_OK;


More information about the svn-src-head mailing list