svn commit: r343250 - stable/12/usr.bin/gzip

Xin LI delphij at FreeBSD.org
Mon Jan 21 06:14:28 UTC 2019


Author: delphij
Date: Mon Jan 21 06:14:26 2019
New Revision: 343250
URL: https://svnweb.freebsd.org/changeset/base/343250

Log:
  MFC r342845,342846: Port NetBSD improvements:
  
   - Add -l support for xz files
   - Add lzip support to gzip based on the example lzip decoder.
  
  Obtained from:	NetBSD
  Relnotes:	yes

Added:
  stable/12/usr.bin/gzip/unlz.c
     - copied unchanged from r342845, head/usr.bin/gzip/unlz.c
Modified:
  stable/12/usr.bin/gzip/gzip.1
  stable/12/usr.bin/gzip/gzip.c
  stable/12/usr.bin/gzip/unxz.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/usr.bin/gzip/gzip.1
==============================================================================
--- stable/12/usr.bin/gzip/gzip.1	Mon Jan 21 04:50:56 2019	(r343249)
+++ stable/12/usr.bin/gzip/gzip.1	Mon Jan 21 06:14:26 2019	(r343250)
@@ -1,4 +1,4 @@
-.\"	$NetBSD: gzip.1,v 1.30 2017/10/22 17:36:49 abhinav Exp $
+.\"	$NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $
 .\"
 .\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green
 .\" All rights reserved.
@@ -25,7 +25,7 @@
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
-.Dd November 21, 2017
+.Dd January 7, 2019
 .Dt GZIP 1
 .Os
 .Sh NAME
@@ -109,6 +109,7 @@ This version of
 is also capable of decompressing files compressed using
 .Xr compress 1 ,
 .Xr bzip2 1 ,
+.Ar lzip ,
 or
 .Xr xz 1 .
 .Sh OPTIONS
@@ -224,7 +225,7 @@ This implementation of
 was ported based on the
 .Nx
 .Nm
-version 20170803,
+version 20181111,
 and first appeared in
 .Fx 7.0 .
 .Sh AUTHORS

Modified: stable/12/usr.bin/gzip/gzip.c
==============================================================================
--- stable/12/usr.bin/gzip/gzip.c	Mon Jan 21 04:50:56 2019	(r343249)
+++ stable/12/usr.bin/gzip/gzip.c	Mon Jan 21 06:14:26 2019	(r343250)
@@ -1,4 +1,4 @@
-/*	$NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $	*/
+/*	$NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
@@ -84,6 +84,9 @@ enum filetype {
 #ifndef NO_XZ_SUPPORT
 	FT_XZ,
 #endif
+#ifndef NO_LZ_SUPPORT
+	FT_LZ,
+#endif
 	FT_LAST,
 	FT_UNKNOWN
 };
@@ -110,6 +113,11 @@ enum filetype {
 #define XZ_MAGIC	"\3757zXZ"
 #endif
 
+#ifndef NO_LZ_SUPPORT
+#define LZ_SUFFIX	".lz"
+#define LZ_MAGIC	"LZIP"
+#endif
+
 #define GZ_SUFFIX	".gz"
 
 #define BUFLEN		(64 * 1024)
@@ -155,6 +163,9 @@ static suffixes_t suffixes[] = {
 #ifndef NO_XZ_SUPPORT
 	SUFFIX(XZ_SUFFIX,	""),
 #endif
+#ifndef NO_LZ_SUPPORT
+	SUFFIX(LZ_SUFFIX,	""),
+#endif
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S "" */
 #endif /* SMALL */
 #undef SUFFIX
@@ -162,7 +173,7 @@ static suffixes_t suffixes[] = {
 #define NUM_SUFFIXES (nitems(suffixes))
 #define SUFFIX_MAXLEN	30
 
-static	const char	gzip_version[] = "FreeBSD gzip 20171121";
+static	const char	gzip_version[] = "FreeBSD gzip 20190107";
 
 #ifndef SMALL
 static	const char	gzip_copyright[] = \
@@ -246,6 +257,7 @@ static	void	display_license(void);
 static	const suffixes_t *check_suffix(char *, int);
 static	ssize_t	read_retry(int, void *, size_t);
 static	ssize_t	write_retry(int, const void *, size_t);
+static void	print_list_out(off_t, off_t, const char*);
 
 #ifdef SMALL
 #define infile_set(f,t) infile_set(f)
@@ -289,8 +301,13 @@ static	off_t	unpack(int, int, char *, size_t, off_t *)
 
 #ifndef NO_XZ_SUPPORT
 static	off_t	unxz(int, int, char *, size_t, off_t *);
+static	off_t	unxz_len(int);
 #endif
 
+#ifndef NO_LZ_SUPPORT
+static	off_t	unlz(int, int, char *, size_t, off_t *);
+#endif
+
 #ifdef SMALL
 #define getopt_long(a,b,c,d,e) getopt(a,b,c)
 #else
@@ -1159,6 +1176,11 @@ file_gettype(u_char *buf)
 		return FT_XZ;
 	else
 #endif
+#ifndef NO_LZ_SUPPORT
+	if (memcmp(buf, LZ_MAGIC, 4) == 0)
+		return FT_LZ;
+	else
+#endif
 		return FT_UNKNOWN;
 }
 
@@ -1632,14 +1654,23 @@ file_uncompress(char *file, char *outfile, size_t outs
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		if (lflag) {
-			maybe_warnx("no -l with xz files");
-			goto lose;
+			size = unxz_len(fd);
+			print_list_out(in_size, size, file);
+			return -1;
 		}
-
 		size = unxz(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
+#ifndef NO_LZ_SUPPORT
+	case FT_LZ:
+		if (lflag) {
+			maybe_warnx("no -l with lzip files");
+			goto lose;
+		}
+		size = unlz(fd, zfd, NULL, 0, NULL);
+		break;
+#endif
 #ifndef SMALL
 	case FT_UNKNOWN:
 		if (lflag) {
@@ -1872,6 +1903,12 @@ handle_stdin(void)
 			     (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
+#ifndef NO_LZ_SUPPORT
+	case FT_LZ:
+		usize = unlz(STDIN_FILENO, STDOUT_FILENO,
+			     (char *)header1, sizeof header1, &gsize);
+		break;
+#endif
 	}
 
 #ifndef SMALL
@@ -2197,6 +2234,12 @@ print_list(int fd, off_t out, const char *outfile, tim
 #else
 	(void)&ts;	/* XXX */
 #endif
+	print_list_out(out, in, outfile);
+}
+
+static void
+print_list_out(off_t out, off_t in, const char *outfile)
+{
 	printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
 	print_ratio(in, out, stdout);
 	printf(" %s\n", outfile);
@@ -2270,6 +2313,9 @@ display_version(void)
 #endif
 #ifndef NO_XZ_SUPPORT
 #include "unxz.c"
+#endif
+#ifndef NO_LZ_SUPPORT
+#include "unlz.c"
 #endif
 
 static ssize_t

Copied: stable/12/usr.bin/gzip/unlz.c (from r342845, head/usr.bin/gzip/unlz.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/12/usr.bin/gzip/unlz.c	Mon Jan 21 06:14:26 2019	(r343250, copy of r342845, head/usr.bin/gzip/unlz.c)
@@ -0,0 +1,646 @@
+/*	$NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $	*/
+
+/*-
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*  Lzd - Educational decompressor for the lzip format
+    Copyright (C) 2013-2018 Antonio Diaz Diaz.
+
+    This program is free software. Redistribution and use in source and
+    binary forms, with or without modification, are permitted provided
+    that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+
+#define LZ_STATES		12
+
+#define LITERAL_CONTEXT_BITS	3
+#define POS_STATE_BITS		2
+#define POS_STATES		(1 << POS_STATE_BITS)
+#define POS_STATE_MASK 		(POS_STATES - 1)
+
+#define STATES			4
+#define DIS_SLOT_BITS		6
+
+#define DIS_MODEL_START		4
+#define DIS_MODEL_END		14
+
+#define MODELED_DISTANCES	(1 << (DIS_MODEL_END / 2))
+#define DIS_ALIGN_BITS		4
+#define DIS_ALIGN_SIZE		(1 << DIS_ALIGN_BITS)
+
+#define LOW_BITS		3
+#define MID_BITS		3
+#define HIGH_BITS		8
+
+#define LOW_SYMBOLS		(1 << LOW_BITS)
+#define MID_SYMBOLS		(1 << MID_BITS)
+#define HIGH_SYMBOLS		(1 << HIGH_BITS)
+
+#define MAX_SYMBOLS 		(LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
+
+#define MIN_MATCH_LEN		2
+
+#define BIT_MODEL_MOVE_BITS	5
+#define BIT_MODEL_TOTAL_BITS 	11
+#define BIT_MODEL_TOTAL 	(1 << BIT_MODEL_TOTAL_BITS)
+#define BIT_MODEL_INIT		(BIT_MODEL_TOTAL / 2)
+
+static const int lz_st_next[] = {
+	0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
+};
+
+static bool
+lz_st_is_char(int st) {
+	return st < 7;
+}
+
+static int
+lz_st_get_char(int st) {
+	return lz_st_next[st];
+}
+
+static int
+lz_st_get_match(int st) {
+	return st < 7 ? 7 : 10;
+}
+
+static int
+lz_st_get_rep(int st) {
+	return st < 7 ? 8 : 11;
+}
+
+static int
+lz_st_get_short_rep(int st) {
+	return st < 7 ? 9 : 11;
+}
+
+struct lz_len_model {
+	int choice1;
+	int choice2;
+	int bm_low[POS_STATES][LOW_SYMBOLS];
+	int bm_mid[POS_STATES][MID_SYMBOLS];
+	int bm_high[HIGH_SYMBOLS];
+};
+
+static uint32_t lz_crc[256];
+
+static void
+lz_crc_init(void)
+{
+	for (unsigned i = 0; i < nitems(lz_crc); i++) {
+		unsigned c = i;
+		for (unsigned j = 0; j < 8; j++) {
+			if (c & 1)
+				c = 0xEDB88320U ^ (c >> 1);
+			else
+				c >>= 1;
+		}
+		lz_crc[i] = c;
+      }
+}
+
+static void
+lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
+{
+	for (size_t i = 0; i < len; i++)
+		*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
+}
+
+struct lz_range_decoder {
+	FILE *fp;
+	uint32_t code;
+	uint32_t range;
+};
+
+static int
+lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
+{
+	rd->fp = fp;
+	rd->code = 0;
+	rd->range = ~0;
+	for (int i = 0; i < 5; i++)
+		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+	return ferror(rd->fp) ? -1 : 0;
+}
+
+static unsigned
+lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
+{
+	unsigned symbol = 0;
+
+	for (int i = num_bits; i > 0; i--) {
+		rd->range >>= 1;
+		symbol <<= 1;
+		if (rd->code >= rd->range) {
+			rd->code -= rd->range;
+			symbol |= 1;
+		}
+		if (rd->range <= 0x00FFFFFFU) {
+			rd->range <<= 8; 
+			rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+		}
+	}
+
+	return symbol;
+}
+
+static unsigned
+lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
+{
+	unsigned symbol;
+	const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
+
+	if(rd->code < bound) {
+		rd->range = bound;
+		*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
+		symbol = 0;
+	}
+	else {
+		rd->range -= bound;
+		rd->code -= bound;
+		*bm -= *bm >> BIT_MODEL_MOVE_BITS;
+		symbol = 1;
+	}
+
+	if (rd->range <= 0x00FFFFFFU) {
+		rd->range <<= 8;
+		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+	}
+	return symbol;
+}
+
+static unsigned
+lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
+{
+	unsigned symbol = 1;
+
+	for (int i = 0; i < num_bits; i++)
+		symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
+
+	return symbol - (1 << num_bits);
+}
+
+static unsigned
+lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
+{
+	unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
+	unsigned reversed_symbol = 0;
+
+	for (int i = 0; i < num_bits; i++) {
+		reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
+		symbol >>= 1;
+	}
+
+	return reversed_symbol;
+}
+
+static unsigned
+lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
+{
+	unsigned symbol = 1;
+
+	for (int i = 7; i >= 0; i--) {
+		const unsigned match_bit = (match_byte >> i) & 1;
+		const unsigned bit = lz_rd_decode_bit(rd,
+		    &bm[symbol + (match_bit << 8) + 0x100]);
+		symbol = (symbol << 1) | bit;
+		if (match_bit != bit) {
+			while (symbol < 0x100) {
+				symbol = (symbol << 1) |
+				    lz_rd_decode_bit(rd, &bm[symbol]);
+			}
+			break;
+		}
+	}
+	return symbol & 0xFF;
+}
+
+static unsigned
+lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
+    int pos_state)
+{
+	if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
+		return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
+
+	if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
+		return LOW_SYMBOLS +
+		    lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
+	}
+
+	return LOW_SYMBOLS + MID_SYMBOLS +
+           lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
+}
+
+struct lz_decoder {
+	FILE *fin, *fout;
+	off_t pos, ppos, spos, dict_size;
+	bool wrapped;
+	uint32_t crc;
+	uint8_t *obuf;
+	struct lz_range_decoder rdec;
+};
+
+static int
+lz_flush(struct lz_decoder *lz)
+{
+	off_t offs = lz->pos - lz->spos;
+	if (offs <= 0)
+		return -1;
+
+	size_t size = (size_t)offs;
+	lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
+	if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
+		return -1;
+
+	lz->wrapped = lz->pos >= lz->dict_size;
+	if (lz->wrapped) {
+		lz->ppos += lz->pos;
+		lz->pos = 0;
+	}
+	lz->spos = lz->pos;
+	return 0;
+}
+
+static void
+lz_destroy(struct lz_decoder *lz)
+{
+	if (lz->fin)
+		fclose(lz->fin);
+	if (lz->fout)
+		fclose(lz->fout);
+	free(lz->obuf);
+}
+
+static int
+lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
+{
+	memset(lz, 0, sizeof(*lz));
+
+	lz->fin = fdopen(dup(fin), "r");
+	if (lz->fin == NULL)
+		goto out;
+
+	lz->fout = fdopen(dup(fdout), "w");
+	if (lz->fout == NULL)
+		goto out;
+
+	lz->pos = lz->ppos = lz->spos = 0;
+	lz->crc = ~0;
+	lz->dict_size = dict_size;
+	lz->wrapped = false;
+
+	lz->obuf = malloc(dict_size);
+	if (lz->obuf == NULL)
+		goto out;
+
+	if (lz_rd_create(&lz->rdec, lz->fin) == -1)
+		goto out;
+	return 0;
+out:
+	lz_destroy(lz);
+	return -1;
+}
+
+static uint8_t
+lz_peek(const struct lz_decoder *lz, unsigned ahead)
+{
+	off_t diff = lz->pos - ahead - 1;
+
+	if (diff >= 0)
+		return lz->obuf[diff];
+
+	if (lz->wrapped)
+		return lz->obuf[lz->dict_size + diff];
+
+	return 0;
+}
+
+static void
+lz_put(struct lz_decoder *lz, uint8_t b)
+{
+	lz->obuf[lz->pos++] = b;
+	if (lz->dict_size == lz->pos)
+		lz_flush(lz);
+}
+
+static off_t
+lz_get_data_position(const struct lz_decoder *lz)
+{
+	return lz->ppos + lz->pos;
+}
+
+static unsigned
+lz_get_crc(const struct lz_decoder *lz)
+{
+	return lz->crc ^ 0xffffffffU;
+}
+
+static void
+lz_bm_init(int *a, size_t l)
+{
+	for (size_t i = 0; i < l; i++)
+		a[i] = BIT_MODEL_INIT;
+}
+
+#define LZ_BM_INIT(a)	lz_bm_init(a, nitems(a))
+#define LZ_BM_INIT2(a)	do { \
+	size_t l = nitems(a[0]); \
+	for (size_t i = 0; i < nitems(a); i++) \
+		lz_bm_init(a[i], l); \
+} while (/*CONSTCOND*/0)
+
+#define LZ_MODEL_INIT(a) do { \
+	a.choice1 = BIT_MODEL_INIT; \
+	a.choice2 = BIT_MODEL_INIT; \
+	LZ_BM_INIT2(a.bm_low); \
+	LZ_BM_INIT2(a.bm_mid); \
+	LZ_BM_INIT(a.bm_high); \
+} while (/*CONSTCOND*/0)
+		
+static bool
+lz_decode_member(struct lz_decoder *lz)
+{
+	int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
+	int bm_match[LZ_STATES][POS_STATES];
+	int bm_rep[4][LZ_STATES];
+	int bm_len[LZ_STATES][POS_STATES];
+	int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
+	int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
+	int bm_align[DIS_ALIGN_SIZE];
+
+	LZ_BM_INIT2(bm_literal);
+	LZ_BM_INIT2(bm_match);
+	LZ_BM_INIT2(bm_rep);
+	LZ_BM_INIT2(bm_len);
+	LZ_BM_INIT2(bm_dis_slot);
+	LZ_BM_INIT(bm_dis);
+	LZ_BM_INIT(bm_align);
+
+	struct lz_len_model match_len_model;
+	struct lz_len_model rep_len_model;
+
+	LZ_MODEL_INIT(match_len_model);
+	LZ_MODEL_INIT(rep_len_model);
+
+	struct lz_range_decoder *rd = &lz->rdec;
+	unsigned rep[4] = { 0 };
+
+
+	int state = 0;
+
+	while (!feof(lz->fin) && !ferror(lz->fin)) {
+		const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
+		// bit 1
+		if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
+			const uint8_t prev_byte = lz_peek(lz, 0);
+			const int literal_state =
+			    prev_byte >> (8 - LITERAL_CONTEXT_BITS);
+			int *bm = bm_literal[literal_state];
+			if (lz_st_is_char(state))
+				lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
+			else {
+				int peek = lz_peek(lz, rep[0]);
+				lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
+			}
+			state = lz_st_get_char(state);
+			continue;
+		}
+		int len;
+		// bit 2
+		if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
+			// bit 3
+			if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
+				// bit 4
+				if (lz_rd_decode_bit(rd,
+				    &bm_len[state][pos_state]) == 0)
+				{
+					state = lz_st_get_short_rep(state);
+					lz_put(lz, lz_peek(lz, rep[0]));
+					continue;
+				}
+			} else {
+				unsigned distance;
+				// bit 4
+				if (lz_rd_decode_bit(rd, &bm_rep[2][state])
+				    == 0)
+					distance = rep[1];
+				else {
+					// bit 5
+					if (lz_rd_decode_bit(rd,
+					    &bm_rep[3][state]) == 0)
+						distance = rep[2];
+					else {
+						distance = rep[3];
+						rep[3] = rep[2];
+					}
+					rep[2] = rep[1];
+				}
+				rep[1] = rep[0];
+				rep[0] = distance;
+			}
+			state = lz_st_get_rep(state);
+			len = MIN_MATCH_LEN +
+			    lz_rd_decode_len(rd, &rep_len_model, pos_state);
+		} else {
+			rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
+			len = MIN_MATCH_LEN +
+			    lz_rd_decode_len(rd, &match_len_model, pos_state);
+			const int len_state =
+			    MIN(len - MIN_MATCH_LEN, STATES - 1);
+			rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
+			    DIS_SLOT_BITS);
+			if (rep[0] >= DIS_MODEL_START) {
+				const unsigned dis_slot = rep[0];
+				const int direct_bits = (dis_slot >> 1) - 1;
+			        rep[0] = (2 | (dis_slot & 1)) << direct_bits;
+				if (dis_slot < DIS_MODEL_END)
+					rep[0] += lz_rd_decode_tree_reversed(rd,
+					    &bm_dis[rep[0] - dis_slot],
+                                            direct_bits);
+				else {
+					rep[0] += lz_rd_decode(rd, direct_bits
+					    - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
+					rep[0] += lz_rd_decode_tree_reversed(rd,
+					    bm_align, DIS_ALIGN_BITS);
+					if (rep[0] == 0xFFFFFFFFU) {
+						lz_flush(lz);
+						return len == MIN_MATCH_LEN;
+					}
+				}
+			}
+			state = lz_st_get_match(state);
+			if (rep[0] >= lz->dict_size ||
+			    (rep[0] >= lz->pos && !lz->wrapped)) {
+				lz_flush(lz);
+				return false;
+			}
+		}
+		for (int i = 0; i < len; i++)
+			lz_put(lz, lz_peek(lz, rep[0]));
+    	}
+	lz_flush(lz);
+	return false;
+}
+
+/*
+ * 0-3	CRC32 of the uncompressed data
+ * 4-11 size of the uncompressed data
+ * 12-19 member size including header and trailer
+ */
+#define TRAILER_SIZE 20
+
+
+static off_t
+lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
+{
+	struct lz_decoder lz;
+	off_t rv = -1;
+
+	if (lz_create(&lz, fin, fdout, dict_size) == -1)
+		return -1;
+
+	if (!lz_decode_member(&lz))
+		goto out;
+
+	uint8_t trailer[TRAILER_SIZE];
+
+	for(size_t i = 0; i < nitems(trailer); i++) 
+		trailer[i] = (uint8_t)getc(lz.fin);
+
+	unsigned crc = 0;
+	for (int i = 3; i >= 0; --i) {
+		crc <<= 8;
+		crc += trailer[i];
+	}
+
+	int64_t data_size = 0;
+	for (int i = 11; i >= 4; --i) {
+		data_size <<= 8;
+		data_size += trailer[i];
+	}
+
+	if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
+		goto out;
+
+	rv = 0;
+	for (int i = 19; i >= 12; --i) {
+		rv <<= 8;
+		rv += trailer[i];
+	}
+	if (insize)
+		*insize = rv;
+#if 0
+	/* Does not work with pipes */
+	rv = ftello(lz.fout);
+#else
+	rv = data_size;
+#endif
+out:
+	lz_destroy(&lz);
+	return rv;
+}
+
+
+/*
+ * 0-3 magic
+ * 4 version
+ * 5 coded dict_size
+ */
+#define HDR_SIZE 6
+#define MIN_DICTIONARY_SIZE (1 << 12)
+#define MAX_DICTIONARY_SIZE (1 << 29)
+
+static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
+
+static unsigned
+lz_get_dict_size(unsigned char c)
+{
+	unsigned dict_size = 1 << (c & 0x1f);
+	dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
+	if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
+		return 0;
+	return dict_size;
+}
+
+static off_t
+unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
+{
+	if (lz_crc[0] == 0)
+		lz_crc_init();
+
+	char header[HDR_SIZE];
+
+	if (prelen > sizeof(header))
+		return -1;
+	if (pre && prelen)
+		memcpy(header, pre, prelen);
+	
+	ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
+	switch (nr) {
+	case -1:
+		return -1;
+	case 0:
+		return prelen ? -1 : 0;
+	default:
+		if ((size_t)nr != sizeof(header) - prelen)
+			return -1;
+		break;
+	}
+
+	if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
+		return -1;
+
+	unsigned dict_size = lz_get_dict_size(header[5]);
+	if (dict_size == 0)
+		return -1;
+
+	return lz_decode(fin, fout, dict_size, bytes_in);
+}

Modified: stable/12/usr.bin/gzip/unxz.c
==============================================================================
--- stable/12/usr.bin/gzip/unxz.c	Mon Jan 21 04:50:56 2019	(r343249)
+++ stable/12/usr.bin/gzip/unxz.c	Mon Jan 21 06:14:26 2019	(r343250)
@@ -1,4 +1,4 @@
-/*	$NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $	*/
+/*	$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
@@ -156,3 +156,322 @@ unxz(int i, int o, char *pre, size_t prelen, off_t *by
 		}
 	}
 }
+
+#include <stdbool.h>
+
+/*
+ * Copied various bits and pieces from xz support code or brute force
+ * replacements.
+ */
+
+#define	my_min(A,B)	((A)<(B)?(A):(B))
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
+#if BUFSIZ <= 1024
+#       define IO_BUFFER_SIZE 8192
+#else
+#       define IO_BUFFER_SIZE (BUFSIZ & ~7U)
+#endif
+
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+        uint8_t u8[IO_BUFFER_SIZE];
+        uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
+        uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
+static bool
+io_pread(int fd, io_buf *buf, size_t size, off_t pos)
+{
+	// Using lseek() and read() is more portable than pread() and
+	// for us it is as good as real pread().
+	if (lseek(fd, pos, SEEK_SET) != pos) {
+		return true;
+	}
+
+	const size_t amount = read(fd, buf, size);
+	if (amount == SIZE_MAX)
+		return true;
+
+	if (amount != size) {
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Most of the following is copied (mostly verbatim) from the xz
+ * distribution, from file src/xz/list.c
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       list.c
+/// \brief      Listing information about .xz files
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// Information about a .xz file
+typedef struct {
+	/// Combined Index of all Streams in the file
+	lzma_index *idx;
+
+	/// Total amount of Stream Padding
+	uint64_t stream_padding;
+
+	/// Highest memory usage so far
+	uint64_t memusage_max;
+
+	/// True if all Blocks so far have Compressed Size and
+	/// Uncompressed Size fields
+	bool all_have_sizes;
+
+	/// Oldest XZ Utils version that will decompress the file
+	uint32_t min_version;
+
+} xz_file_info;
+
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
+
+
+/// \brief      Parse the Index(es) from the given .xz file
+///
+/// \param      xfi     Pointer to structure where the decoded information
+///                     is stored.
+/// \param      pair    Input file
+///
+/// \return     On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(xz_file_info *xfi, int src_fd)
+{
+	struct stat st;
+
+	fstat(src_fd, &st);
+	if (st.st_size <= 0) {
+		return true;
+	}
+
+	if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+		return true;
+	}
+
+	io_buf buf;
+	lzma_stream_flags header_flags;
+	lzma_stream_flags footer_flags;
+	lzma_ret ret;
+
+	// lzma_stream for the Index decoder
+	lzma_stream strm = LZMA_STREAM_INIT;
+
+	// All Indexes decoded so far
+	lzma_index *combined_index = NULL;
+
+	// The Index currently being decoded
+	lzma_index *this_index = NULL;
+
+	// Current position in the file. We parse the file backwards so
+	// initialize it to point to the end of the file.
+	off_t pos = st.st_size;
+
+	// Each loop iteration decodes one Index.
+	do {
+		// Check that there is enough data left to contain at least
+		// the Stream Header and Stream Footer. This check cannot
+		// fail in the first pass of this loop.
+		if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+			goto error;
+		}
+
+		pos -= LZMA_STREAM_HEADER_SIZE;
+		lzma_vli stream_padding = 0;
+
+		// Locate the Stream Footer. There may be Stream Padding which
+		// we must skip when reading backwards.
+		while (true) {
+			if (pos < LZMA_STREAM_HEADER_SIZE) {
+				goto error;
+			}
+
+			if (io_pread(src_fd, &buf,
+					LZMA_STREAM_HEADER_SIZE, pos))
+				goto error;
+
+			// Stream Padding is always a multiple of four bytes.
+			int i = 2;
+			if (buf.u32[i] != 0)
+				break;
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list