svn commit: r313572 - head/contrib/libarchive/libarchive

Martin Matuska mm at FreeBSD.org
Sat Feb 11 01:01:00 UTC 2017


Author: mm
Date: Sat Feb 11 01:00:58 2017
New Revision: 313572
URL: https://svnweb.freebsd.org/changeset/base/313572

Log:
  MFV r313569:313569:313569:
  Sync libarchive with vendor
  
  Vendor bugfixes:
  cpio reader sanity fix (OSS-Fuzz 504)
  WARC reader sanity fixes (OSS-Fuzz 511, 526, 532, 552)
  mtree reader time parsing fix (OSS-Fuzz 538)
  XAR reader memleak fix (OSS-Fuzz 551)
  
  MFC after:	1 week

Modified:
  head/contrib/libarchive/libarchive/archive_read_support_format_cpio.c
  head/contrib/libarchive/libarchive/archive_read_support_format_mtree.c
  head/contrib/libarchive/libarchive/archive_read_support_format_warc.c
  head/contrib/libarchive/libarchive/archive_read_support_format_xar.c
Directory Properties:
  head/contrib/libarchive/   (props changed)

Modified: head/contrib/libarchive/libarchive/archive_read_support_format_cpio.c
==============================================================================
--- head/contrib/libarchive/libarchive/archive_read_support_format_cpio.c	Sat Feb 11 00:56:18 2017	(r313571)
+++ head/contrib/libarchive/libarchive/archive_read_support_format_cpio.c	Sat Feb 11 01:00:58 2017	(r313572)
@@ -356,7 +356,7 @@ archive_read_format_cpio_read_header(str
     struct archive_entry *entry)
 {
 	struct cpio *cpio;
-	const void *h;
+	const void *h, *hl;
 	struct archive_string_conv *sconv;
 	size_t namelength;
 	size_t name_pad;
@@ -406,11 +406,11 @@ archive_read_format_cpio_read_header(str
 			    "Rejecting malformed cpio archive: symlink contents exceed 1 megabyte");
 			return (ARCHIVE_FATAL);
 		}
-		h = __archive_read_ahead(a,
+		hl = __archive_read_ahead(a,
 			(size_t)cpio->entry_bytes_remaining, NULL);
-		if (h == NULL)
+		if (hl == NULL)
 			return (ARCHIVE_FATAL);
-		if (archive_entry_copy_symlink_l(entry, (const char *)h,
+		if (archive_entry_copy_symlink_l(entry, (const char *)hl,
 		    (size_t)cpio->entry_bytes_remaining, sconv) != 0) {
 			if (errno == ENOMEM) {
 				archive_set_error(&a->archive, ENOMEM,
@@ -434,7 +434,7 @@ archive_read_format_cpio_read_header(str
 	 * header.  XXX */
 
 	/* Compare name to "TRAILER!!!" to test for end-of-archive. */
-	if (namelength == 11 && memcmp((const char *)h, "TRAILER!!!",
+	if (namelength == 11 && strncmp((const char *)h, "TRAILER!!!",
 	    11) == 0) {
 		/* TODO: Store file location of start of block. */
 		archive_clear_error(&a->archive);

Modified: head/contrib/libarchive/libarchive/archive_read_support_format_mtree.c
==============================================================================
--- head/contrib/libarchive/libarchive/archive_read_support_format_mtree.c	Sat Feb 11 00:56:18 2017	(r313571)
+++ head/contrib/libarchive/libarchive/archive_read_support_format_mtree.c	Sat Feb 11 01:00:58 2017	(r313572)
@@ -1608,8 +1608,11 @@ parse_keyword(struct archive_read *a, st
 			if (*val == '.') {
 				++val;
 				ns = (long)mtree_atol10(&val);
-			} else
-				ns = 0;
+				if (ns < 0)
+					ns = 0;
+				else if (ns > 999999999)
+					ns = 999999999;
+			}
 			if (m > my_time_t_max)
 				m = my_time_t_max;
 			else if (m < my_time_t_min)

Modified: head/contrib/libarchive/libarchive/archive_read_support_format_warc.c
==============================================================================
--- head/contrib/libarchive/libarchive/archive_read_support_format_warc.c	Sat Feb 11 00:56:18 2017	(r313571)
+++ head/contrib/libarchive/libarchive/archive_read_support_format_warc.c	Sat Feb 11 01:00:58 2017	(r313572)
@@ -134,8 +134,8 @@ static ssize_t _warc_rdlen(const char *b
 static time_t _warc_rdrtm(const char *buf, size_t bsz);
 static time_t _warc_rdmtm(const char *buf, size_t bsz);
 static const char *_warc_find_eoh(const char *buf, size_t bsz);
+static const char *_warc_find_eol(const char *buf, size_t bsz);
 
-

 int
 archive_read_support_format_warc(struct archive *_a)
 {
@@ -198,8 +198,8 @@ _warc_bid(struct archive_read *a, int be
 
 	/* otherwise snarf the record's version number */
 	ver = _warc_rdver(hdr, nrd);
-	if (ver == 0U || ver > 10000U) {
-		/* oh oh oh, best not to wager ... */
+	if (ver < 1200U || ver > 10000U) {
+		/* we only support WARC 0.12 to 1.0 */
 		return -1;
 	}
 
@@ -254,23 +254,32 @@ start_over:
 			&a->archive, ARCHIVE_ERRNO_MISC,
 			"Bad record header");
 		return (ARCHIVE_FATAL);
-	} else if ((ver = _warc_rdver(buf, eoh - buf)) > 10000U) {
-		/* nawww, I wish they promised backward compatibility
-		 * anyhoo, in their infinite wisdom the 28500 guys might
-		 * come up with something we can't possibly handle so
-		 * best end things here */
+	}
+	ver = _warc_rdver(buf, eoh - buf);
+	/* we currently support WARC 0.12 to 1.0 */
+	if (ver == 0U) {
 		archive_set_error(
 			&a->archive, ARCHIVE_ERRNO_MISC,
-			"Unsupported record version");
+			"Invalid record version");
 		return (ARCHIVE_FATAL);
-	} else if ((cntlen = _warc_rdlen(buf, eoh - buf)) < 0) {
+	} else if (ver < 1200U || ver > 10000U) {
+		archive_set_error(
+			&a->archive, ARCHIVE_ERRNO_MISC,
+			"Unsupported record version: %u.%u",
+			ver / 10000, (ver % 10000) / 100);
+		return (ARCHIVE_FATAL);
+	}
+	cntlen = _warc_rdlen(buf, eoh - buf);
+	if (cntlen < 0) {
 		/* nightmare!  the specs say content-length is mandatory
 		 * so I don't feel overly bad stopping the reader here */
 		archive_set_error(
 			&a->archive, EINVAL,
 			"Bad content length");
 		return (ARCHIVE_FATAL);
-	} else if ((rtime = _warc_rdrtm(buf, eoh - buf)) == (time_t)-1) {
+	}
+	rtime = _warc_rdrtm(buf, eoh - buf);
+	if (rtime == (time_t)-1) {
 		/* record time is mandatory as per WARC/1.0,
 		 * so just barf here, fast and loud */
 		archive_set_error(
@@ -284,7 +293,7 @@ start_over:
 	if (ver != w->pver) {
 		/* stringify this entry's version */
 		archive_string_sprintf(&w->sver,
-			"WARC/%u.%u", ver / 10000, ver % 10000);
+			"WARC/%u.%u", ver / 10000, (ver % 10000) / 100);
 		/* remember the version */
 		w->pver = ver;
 	}
@@ -577,51 +586,41 @@ out:
 }
 
 static unsigned int
-_warc_rdver(const char buf[10], size_t bsz)
+_warc_rdver(const char *buf, size_t bsz)
 {
 	static const char magic[] = "WARC/";
-	unsigned int ver;
-
-	(void)bsz; /* UNUSED */
+	unsigned int ver = 0U;
+	unsigned int end = 0U;
 
-	if (memcmp(buf, magic, sizeof(magic) - 1U) != 0) {
-		/* nope */
-		return 99999U;
+	if (bsz < 12 || memcmp(buf, magic, sizeof(magic) - 1U) != 0) {
+		/* buffer too small or invalid magic */
+		return ver;
 	}
 	/* looks good so far, read the version number for a laugh */
 	buf += sizeof(magic) - 1U;
-	/* most common case gets a quick-check here */
-	if (memcmp(buf, "1.0\r\n", 5U) == 0) {
-		ver = 10000U;
-	} else {
-		switch (*buf) {
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-		case '8':
-			if (buf[1U] == '.') {
-				char *on;
-
-				/* set up major version */
-				ver = (buf[0U] - '0') * 10000U;
-				/* minor version, anyone? */
-				ver += (strtol(buf + 2U, &on, 10)) * 100U;
-				/* don't parse anything else */
-				if (on > buf + 2U) {
-					break;
-				}
-			}
-			/* FALLTHROUGH */
-		case '9':
-		default:
-			/* just make the version ridiculously high */
-			ver = 999999U;
-			break;
+
+	if (isdigit(buf[0U]) && (buf[1U] == '.') && isdigit(buf[2U])) {
+		/* we support a maximum of 2 digits in the minor version */
+		if (isdigit(buf[3U]))
+			end = 1U;
+		/* set up major version */
+		ver = (buf[0U] - '0') * 10000U;
+		/* set up minor version */
+		if (end == 1U) {
+			ver += (buf[2U] - '0') * 1000U;
+			ver += (buf[3U] - '0') * 100U;
+		} else
+			ver += (buf[2U] - '0') * 100U;
+		/*
+		 * WARC below version 0.12 has a space-separated header
+		 * WARC 0.12 and above terminates the version with a CRLF
+		 */
+		if (ver >= 1200U) {
+			if (memcmp(buf + 3U + end, "\r\n", 2U) != 0)
+				ver = 0U;
+		} else if (ver < 1200U) {
+			if (!isblank(*(buf + 3U + end)))
+				ver = 0U;
 		}
 	}
 	return ver;
@@ -631,34 +630,27 @@ static unsigned int
 _warc_rdtyp(const char *buf, size_t bsz)
 {
 	static const char _key[] = "\r\nWARC-Type:";
-	const char *const eob = buf + bsz;
-	const char *val;
+	const char *val, *eol;
 
 	if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
 		/* no bother */
 		return WT_NONE;
 	}
-	/* overread whitespace */
 	val += sizeof(_key) - 1U;
-	while (val < eob && isspace((unsigned char)*val))
+	if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
+		/* no end of line */
+		return WT_NONE;
+	}
+
+	/* overread whitespace */
+	while (val < eol && isblank((unsigned char)*val))
 		++val;
 
-	if (val + 8U > eob) {
-		;
-	} else if (memcmp(val, "resource", 8U) == 0) {
-		return WT_RSRC;
-	} else if (memcmp(val, "warcinfo", 8U) == 0) {
-		return WT_INFO;
-	} else if (memcmp(val, "metadata", 8U) == 0) {
-		return WT_META;
-	} else if (memcmp(val, "request", 7U) == 0) {
-		return WT_REQ;
-	} else if (memcmp(val, "response", 8U) == 0) {
-		return WT_RSP;
-	} else if (memcmp(val, "conversi", 8U) == 0) {
-		return WT_CONV;
-	} else if (memcmp(val, "continua", 8U) == 0) {
-		return WT_CONT;
+	if (val + 8U == eol) {
+		if (memcmp(val, "resource", 8U) == 0)
+			return WT_RSRC;
+		else if (memcmp(val, "response", 8U) == 0)
+			return WT_RSP;
 	}
 	return WT_NONE;
 }
@@ -667,10 +659,7 @@ static warc_string_t
 _warc_rduri(const char *buf, size_t bsz)
 {
 	static const char _key[] = "\r\nWARC-Target-URI:";
-	const char *const eob = buf + bsz;
-	const char *val;
-	const char *uri;
-	const char *eol;
+	const char *val, *uri, *eol, *p;
 	warc_string_t res = {0U, NULL};
 
 	if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
@@ -679,25 +668,32 @@ _warc_rduri(const char *buf, size_t bsz)
 	}
 	/* overread whitespace */
 	val += sizeof(_key) - 1U;
-	while (val < eob && isspace((unsigned char)*val))
+	if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
+		/* no end of line */
+		return res;
+	}
+
+	while (val < eol && isblank((unsigned char)*val))
 		++val;
 
 	/* overread URL designators */
-	if ((uri = xmemmem(val, eob - val, "://", 3U)) == NULL) {
+	if ((uri = xmemmem(val, eol - val, "://", 3U)) == NULL) {
 		/* not touching that! */
 		return res;
-	} else if ((eol = memchr(uri, '\n', eob - uri)) == NULL) {
-		/* no end of line? :O */
-		return res;
 	}
 
-	/* massage uri to point to after :// */
+	/* spaces inside uri are not allowed, CRLF should follow */
+	for (p = val; p < eol; p++) {
+		if (isspace(*p))
+			return res;
+	}
+
+	/* there must be at least space for ftp */
+	if (uri < (val + 3U))
+		return res;
+
+	/* move uri to point to after :// */
 	uri += 3U;
-	/* also massage eol to point to the first whitespace
-	 * after the last non-whitespace character before
-	 * the end of the line */
-	while (eol > uri && isspace((unsigned char)eol[-1]))
-		--eol;
 
 	/* now then, inspect the URI */
 	if (memcmp(val, "file", 4U) == 0) {
@@ -720,7 +716,7 @@ static ssize_t
 _warc_rdlen(const char *buf, size_t bsz)
 {
 	static const char _key[] = "\r\nContent-Length:";
-	const char *val;
+	const char *val, *eol;
 	char *on = NULL;
 	long int len;
 
@@ -728,14 +724,24 @@ _warc_rdlen(const char *buf, size_t bsz)
 		/* no bother */
 		return -1;
 	}
-
-	/* strtol kindly overreads whitespace for us, so use that */
 	val += sizeof(_key) - 1U;
+	if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
+		/* no end of line */
+		return -1;
+	}
+
+	/* skip leading whitespace */
+	while (val < eol && isblank(*val))
+		val++;
+	/* there must be at least one digit */
+	if (!isdigit(*val))
+		return -1;
 	len = strtol(val, &on, 10);
-	if (on == NULL || !isspace((unsigned char)*on)) {
-		/* hm, can we trust that number?  Best not. */
+	if (on != eol) {
+		/* line must end here */
 		return -1;
 	}
+
 	return (size_t)len;
 }
 
@@ -743,7 +749,7 @@ static time_t
 _warc_rdrtm(const char *buf, size_t bsz)
 {
 	static const char _key[] = "\r\nWARC-Date:";
-	const char *val;
+	const char *val, *eol;
 	char *on = NULL;
 	time_t res;
 
@@ -751,13 +757,17 @@ _warc_rdrtm(const char *buf, size_t bsz)
 		/* no bother */
 		return (time_t)-1;
 	}
+	val += sizeof(_key) - 1U;
+	if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) {
+		/* no end of line */
+		return -1;
+	}
 
 	/* xstrpisotime() kindly overreads whitespace for us, so use that */
-	val += sizeof(_key) - 1U;
 	res = xstrpisotime(val, &on);
-	if (on == NULL || !isspace((unsigned char)*on)) {
-		/* hm, can we trust that number?  Best not. */
-		return (time_t)-1;
+	if (on != eol) {
+		/* line must end here */
+		return -1;
 	}
 	return res;
 }
@@ -766,7 +776,7 @@ static time_t
 _warc_rdmtm(const char *buf, size_t bsz)
 {
 	static const char _key[] = "\r\nLast-Modified:";
-	const char *val;
+	const char *val, *eol;
 	char *on = NULL;
 	time_t res;
 
@@ -774,13 +784,17 @@ _warc_rdmtm(const char *buf, size_t bsz)
 		/* no bother */
 		return (time_t)-1;
 	}
+	val += sizeof(_key) - 1U;
+	if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) {
+		/* no end of line */
+		return -1;
+	}
 
 	/* xstrpisotime() kindly overreads whitespace for us, so use that */
-	val += sizeof(_key) - 1U;
 	res = xstrpisotime(val, &on);
-	if (on == NULL || !isspace((unsigned char)*on)) {
-		/* hm, can we trust that number?  Best not. */
-		return (time_t)-1;
+	if (on != eol) {
+		/* line must end here */
+		return -1;
 	}
 	return res;
 }
@@ -797,4 +811,12 @@ _warc_find_eoh(const char *buf, size_t b
 	return hit;
 }
 
+static const char*
+_warc_find_eol(const char *buf, size_t bsz)
+{
+	static const char _marker[] = "\r\n";
+	const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U);
+
+	return hit;
+}
 /* archive_read_support_format_warc.c ends here */

Modified: head/contrib/libarchive/libarchive/archive_read_support_format_xar.c
==============================================================================
--- head/contrib/libarchive/libarchive/archive_read_support_format_xar.c	Sat Feb 11 00:56:18 2017	(r313571)
+++ head/contrib/libarchive/libarchive/archive_read_support_format_xar.c	Sat Feb 11 01:00:58 2017	(r313572)
@@ -394,6 +394,7 @@ static void	checksum_update(struct archi
 		    size_t, const void *, size_t);
 static int	checksum_final(struct archive_read *, const void *,
 		    size_t, const void *, size_t);
+static void	checksum_cleanup(struct archive_read *);
 static int	decompression_init(struct archive_read *, enum enctype);
 static int	decompress(struct archive_read *, const void **,
 		    size_t *, const void *, size_t *);
@@ -923,6 +924,7 @@ xar_cleanup(struct archive_read *a)
 	int r;
 
 	xar = (struct xar *)(a->format->data);
+	checksum_cleanup(a);
 	r = decompression_cleanup(a);
 	hdlink = xar->hdlink_list;
 	while (hdlink != NULL) {
@@ -1720,6 +1722,16 @@ decompression_cleanup(struct archive_rea
 }
 
 static void
+checksum_cleanup(struct archive_read *a) {
+	struct xar *xar;
+
+	xar = (struct xar *)(a->format->data);
+
+	_checksum_final(&(xar->a_sumwrk), NULL, 0);
+	_checksum_final(&(xar->e_sumwrk), NULL, 0);
+}
+
+static void
 xmlattr_cleanup(struct xmlattr_list *list)
 {
 	struct xmlattr *attr, *next;


More information about the svn-src-head mailing list