svn commit: r219354 - in head/sbin: hastctl hastd
Pawel Jakub Dawidek
pjd at FreeBSD.org
Sun Mar 6 23:09:34 UTC 2011
Author: pjd
Date: Sun Mar 6 23:09:33 2011
New Revision: 219354
URL: http://svn.freebsd.org/changeset/base/219354
Log:
Allow to compress on-the-wire data using two algorithms:
- HOLE - it simply turns all-zero blocks into few bytes header;
it is extremely fast, so it is turned on by default;
it is mostly intended to speed up initial synchronization
where we expect many zeros;
- LZF - very fast algorithm by Marc Alexander Lehmann, which shows
very decent compression ratio and has BSD license.
MFC after: 2 weeks
Added:
head/sbin/hastd/hast_compression.c (contents, props changed)
head/sbin/hastd/hast_compression.h (contents, props changed)
head/sbin/hastd/lzf.c (contents, props changed)
head/sbin/hastd/lzf.h (contents, props changed)
Modified:
head/sbin/hastctl/Makefile
head/sbin/hastd/Makefile
head/sbin/hastd/control.c
head/sbin/hastd/hast.conf.5
head/sbin/hastd/hast.h
head/sbin/hastd/hast_proto.c
head/sbin/hastd/hastd.c
head/sbin/hastd/parse.y
head/sbin/hastd/primary.c
head/sbin/hastd/token.l
Modified: head/sbin/hastctl/Makefile
==============================================================================
--- head/sbin/hastctl/Makefile Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastctl/Makefile Sun Mar 6 23:09:33 2011 (r219354)
@@ -8,7 +8,8 @@ PROG= hastctl
SRCS= activemap.c
SRCS+= crc32.c
SRCS+= ebuf.c
-SRCS+= hast_checksum.c hast_proto.c hastctl.c
+SRCS+= hast_checksum.c hast_compression.c hast_proto.c hastctl.c
+SRCS+= lzf.c
SRCS+= metadata.c
SRCS+= nv.c
SRCS+= parse.y pjdlog.c
Modified: head/sbin/hastd/Makefile
==============================================================================
--- head/sbin/hastd/Makefile Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastd/Makefile Sun Mar 6 23:09:33 2011 (r219354)
@@ -6,7 +6,8 @@ PROG= hastd
SRCS= activemap.c
SRCS+= control.c crc32.c
SRCS+= ebuf.c event.c
-SRCS+= hast_checksum.c hast_proto.c hastd.c hooks.c
+SRCS+= hast_checksum.c hast_compression.c hast_proto.c hastd.c hooks.c
+SRCS+= lzf.c
SRCS+= metadata.c
SRCS+= nv.c
SRCS+= secondary.c
Modified: head/sbin/hastd/control.c
==============================================================================
--- head/sbin/hastd/control.c Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastd/control.c Sun Mar 6 23:09:33 2011 (r219354)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "hast.h"
#include "hastd.h"
#include "hast_checksum.h"
+#include "hast_compression.h"
#include "hast_proto.h"
#include "hooks.h"
#include "nv.h"
@@ -249,6 +250,8 @@ control_status(struct hastd_config *cfg,
}
nv_add_string(nvout, checksum_name(res->hr_checksum),
"checksum%u", no);
+ nv_add_string(nvout, compression_name(res->hr_compression),
+ "compression%u", no);
nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
switch (res->hr_role) {
Modified: head/sbin/hastd/hast.conf.5
==============================================================================
--- head/sbin/hastd/hast.conf.5 Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastd/hast.conf.5 Sun Mar 6 23:09:33 2011 (r219354)
@@ -60,6 +60,7 @@ control <addr>
listen <addr>
replication <mode>
checksum <algorithm>
+compression <algorithm>
timeout <seconds>
exec <path>
@@ -79,6 +80,7 @@ resource <name> {
# Resource section
replication <mode>
checksum <algorithm>
+ compression <algorithm>
name <name>
local <path>
timeout <seconds>
@@ -215,6 +217,24 @@ CRC32 checksum will be calculated.
.It Ic sha256
SHA256 checksum will be calculated.
.El
+.It Ic compression Aq algorithm
+.Pp
+Compression algorithm should be one of the following:
+.Bl -tag -width ".Ic none"
+.It Ic none
+Data send over the network will not be compressed.
+.It Ic hole
+Only blocks that contain all zeros will be compressed.
+This is very useful for initial synchronization where potentially many blocks
+are still all zeros.
+There should be no measurable performance overhead when this algorithm is being
+used.
+This is the default setting.
+.It Ic lzf
+The LZF algorithm by Marc Alexander Lehmann will be used to compress the data
+send over the network.
+LZF is very fast, general purpose compression algorithm.
+.El
.It Ic timeout Aq seconds
.Pp
Connection timeout in seconds.
Modified: head/sbin/hastd/hast.h
==============================================================================
--- head/sbin/hastd/hast.h Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastd/hast.h Sun Mar 6 23:09:33 2011 (r219354)
@@ -117,6 +117,10 @@ struct hastd_config {
#define HAST_REPLICATION_MEMSYNC 1
#define HAST_REPLICATION_ASYNC 2
+#define HAST_COMPRESSION_NONE 0
+#define HAST_COMPRESSION_HOLE 1
+#define HAST_COMPRESSION_LZF 2
+
#define HAST_CHECKSUM_NONE 0
#define HAST_CHECKSUM_CRC32 1
#define HAST_CHECKSUM_SHA256 2
@@ -137,6 +141,8 @@ struct hast_resource {
int hr_keepdirty;
/* Path to a program to execute on various events. */
char hr_exec[PATH_MAX];
+ /* Compression algorithm. */
+ int hr_compression;
/* Checksum algorithm. */
int hr_checksum;
Added: head/sbin/hastd/hast_compression.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sbin/hastd/hast_compression.c Sun Mar 6 23:09:33 2011 (r219354)
@@ -0,0 +1,283 @@
+/*-
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+
+#include <errno.h>
+#include <string.h>
+#include <strings.h>
+
+#include <hast.h>
+#include <lzf.h>
+#include <nv.h>
+#include <pjdlog.h>
+
+#include "hast_compression.h"
+
+static bool
+allzeros(const void *data, size_t size)
+{
+ const uint64_t *p = data;
+ unsigned int i;
+ uint64_t v;
+
+ PJDLOG_ASSERT((size % sizeof(*p)) == 0);
+
+ /*
+ * This is the fastest method I found for checking if the given
+ * buffer contain all zeros.
+ * Because inside the loop we don't check at every step, we would
+ * get an answer only after walking through entire buffer.
+ * To return early if the buffer doesn't contain all zeros, we probe
+ * 8 bytes at the begining, in the middle and at the end of the buffer
+ * first.
+ */
+
+ size >>= 3; /* divide by 8 */
+ if ((p[0] | p[size >> 1] | p[size - 1]) != 0)
+ return (false);
+ v = 0;
+ for (i = 0; i < size; i++)
+ v |= *p++;
+ return (v == 0);
+}
+
+static void *
+hast_hole_compress(const unsigned char *data, size_t *sizep)
+{
+ uint32_t size;
+ void *newbuf;
+
+ if (!allzeros(data, *sizep))
+ return (NULL);
+
+ newbuf = malloc(sizeof(size));
+ if (newbuf == NULL) {
+ pjdlog_warning("Unable to compress (no memory: %zu).",
+ (size_t)*sizep);
+ return (NULL);
+ }
+ size = htole32((uint32_t)*sizep);
+ bcopy(&size, newbuf, sizeof(size));
+ *sizep = sizeof(size);
+
+ return (newbuf);
+}
+
+static void *
+hast_hole_decompress(const unsigned char *data, size_t *sizep)
+{
+ uint32_t size;
+ void *newbuf;
+
+ if (*sizep != sizeof(size)) {
+ pjdlog_error("Unable to decompress (invalid size: %zu).",
+ *sizep);
+ return (NULL);
+ }
+
+ bcopy(data, &size, sizeof(size));
+ size = le32toh(size);
+
+ newbuf = malloc(size);
+ if (newbuf == NULL) {
+ pjdlog_error("Unable to decompress (no memory: %zu).",
+ (size_t)size);
+ return (NULL);
+ }
+ bzero(newbuf, size);
+ *sizep = size;
+
+ return (newbuf);
+}
+
+/* Minimum block size to try to compress. */
+#define HAST_LZF_COMPRESS_MIN 1024
+
+static void *
+hast_lzf_compress(const unsigned char *data, size_t *sizep)
+{
+ unsigned char *newbuf;
+ uint32_t origsize;
+ size_t newsize;
+
+ origsize = *sizep;
+
+ if (origsize <= HAST_LZF_COMPRESS_MIN)
+ return (NULL);
+
+ newsize = sizeof(origsize) + origsize - HAST_LZF_COMPRESS_MIN;
+ newbuf = malloc(newsize);
+ if (newbuf == NULL) {
+ pjdlog_warning("Unable to compress (no memory: %zu).",
+ newsize);
+ return (NULL);
+ }
+ newsize = lzf_compress(data, *sizep, newbuf + sizeof(origsize),
+ newsize - sizeof(origsize));
+ if (newsize == 0) {
+ free(newbuf);
+ return (NULL);
+ }
+ origsize = htole32(origsize);
+ bcopy(&origsize, newbuf, sizeof(origsize));
+
+ *sizep = sizeof(origsize) + newsize;
+ return (newbuf);
+}
+
+static void *
+hast_lzf_decompress(const unsigned char *data, size_t *sizep)
+{
+ unsigned char *newbuf;
+ uint32_t origsize;
+ size_t newsize;
+
+ PJDLOG_ASSERT(*sizep > sizeof(origsize));
+
+ bcopy(data, &origsize, sizeof(origsize));
+ origsize = le32toh(origsize);
+ PJDLOG_ASSERT(origsize > HAST_LZF_COMPRESS_MIN);
+
+ newbuf = malloc(origsize);
+ if (newbuf == NULL) {
+ pjdlog_error("Unable to decompress (no memory: %zu).",
+ (size_t)origsize);
+ return (NULL);
+ }
+ newsize = lzf_decompress(data + sizeof(origsize),
+ *sizep - sizeof(origsize), newbuf, origsize);
+ if (newsize == 0) {
+ free(newbuf);
+ pjdlog_error("Unable to decompress.");
+ return (NULL);
+ }
+ PJDLOG_ASSERT(newsize == origsize);
+
+ *sizep = newsize;
+ return (newbuf);
+}
+
+const char *
+compression_name(int num)
+{
+
+ switch (num) {
+ case HAST_COMPRESSION_NONE:
+ return ("none");
+ case HAST_COMPRESSION_HOLE:
+ return ("hole");
+ case HAST_COMPRESSION_LZF:
+ return ("lzf");
+ }
+ return ("unknown");
+}
+
+int
+compression_send(const struct hast_resource *res, struct nv *nv, void **datap,
+ size_t *sizep, bool *freedatap)
+{
+ unsigned char *newbuf;
+ int compression;
+ size_t size;
+
+ size = *sizep;
+ compression = res->hr_compression;
+
+ switch (compression) {
+ case HAST_COMPRESSION_NONE:
+ return (0);
+ case HAST_COMPRESSION_HOLE:
+ newbuf = hast_hole_compress(*datap, &size);
+ break;
+ case HAST_COMPRESSION_LZF:
+ /* Try 'hole' compression first. */
+ newbuf = hast_hole_compress(*datap, &size);
+ if (newbuf != NULL)
+ compression = HAST_COMPRESSION_HOLE;
+ else
+ newbuf = hast_lzf_compress(*datap, &size);
+ break;
+ default:
+ PJDLOG_ABORT("Invalid compression: %d.", res->hr_compression);
+ }
+
+ if (newbuf == NULL) {
+ /* Unable to compress the data. */
+ return (0);
+ }
+ nv_add_string(nv, compression_name(compression), "compression");
+ if (nv_error(nv) != 0) {
+ free(newbuf);
+ errno = nv_error(nv);
+ return (-1);
+ }
+ if (*freedatap)
+ free(*datap);
+ *freedatap = true;
+ *datap = newbuf;
+ *sizep = size;
+
+ return (0);
+}
+
+int
+compression_recv(const struct hast_resource *res __unused, struct nv *nv,
+ void **datap, size_t *sizep, bool *freedatap)
+{
+ unsigned char *newbuf;
+ const char *algo;
+ size_t size;
+
+ algo = nv_get_string(nv, "compression");
+ if (algo == NULL)
+ return (0); /* No compression. */
+
+ newbuf = NULL;
+ size = *sizep;
+
+ if (strcmp(algo, "hole") == 0)
+ newbuf = hast_hole_decompress(*datap, &size);
+ else if (strcmp(algo, "lzf") == 0)
+ newbuf = hast_lzf_decompress(*datap, &size);
+ else {
+ pjdlog_error("Unknown compression algorithm '%s'.", algo);
+ return (-1); /* Unknown compression algorithm. */
+ }
+
+ if (newbuf == NULL)
+ return (-1);
+ if (*freedatap)
+ free(*datap);
+ *freedatap = true;
+ *datap = newbuf;
+ *sizep = size;
+
+ return (0);
+}
Added: head/sbin/hastd/hast_compression.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sbin/hastd/hast_compression.h Sun Mar 6 23:09:33 2011 (r219354)
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _HAST_COMPRESSION_H_
+#define _HAST_COMPRESSION_H_
+
+#include <stdlib.h> /* size_t */
+
+#include <hast.h>
+#include <nv.h>
+
+const char *compression_name(int num);
+
+int compression_send(const struct hast_resource *res, struct nv *nv,
+ void **datap, size_t *sizep, bool *freedatap);
+int compression_recv(const struct hast_resource *res, struct nv *nv,
+ void **datap, size_t *sizep, bool *freedatap);
+
+#endif /* !_HAST_COMPRESSION_H_ */
Modified: head/sbin/hastd/hast_proto.c
==============================================================================
--- head/sbin/hastd/hast_proto.c Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastd/hast_proto.c Sun Mar 6 23:09:33 2011 (r219354)
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#ifdef HAVE_CRYPTO
#include "hast_checksum.h"
#endif
+#include "hast_compression.h"
#include "hast_proto.h"
struct hast_main_header {
@@ -67,6 +68,7 @@ struct hast_pipe_stage {
};
static struct hast_pipe_stage pipeline[] = {
+ { "compression", compression_send, compression_recv },
{ "checksum", checksum_send, checksum_recv }
};
Modified: head/sbin/hastd/hastd.c
==============================================================================
--- head/sbin/hastd/hastd.c Sun Mar 6 23:01:02 2011 (r219353)
+++ head/sbin/hastd/hastd.c Sun Mar 6 23:09:33 2011 (r219354)
@@ -363,6 +363,8 @@ resource_needs_restart(const struct hast
return (true);
if (res0->hr_checksum != res1->hr_checksum)
return (true);
+ if (res0->hr_compression != res1->hr_compression)
+ return (true);
if (res0->hr_timeout != res1->hr_timeout)
return (true);
if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
@@ -389,6 +391,8 @@ resource_needs_reload(const struct hast_
return (true);
if (res0->hr_checksum != res1->hr_checksum)
return (true);
+ if (res0->hr_compression != res1->hr_compression)
+ return (true);
if (res0->hr_timeout != res1->hr_timeout)
return (true);
if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
@@ -409,6 +413,7 @@ resource_reload(const struct hast_resour
nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr");
nv_add_int32(nvout, (int32_t)res->hr_replication, "replication");
nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum");
+ nv_add_int32(nvout, (int32_t)res->hr_compression, "compression");
nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout");
nv_add_string(nvout, res->hr_exec, "exec");
if (nv_error(nvout) != 0) {
@@ -568,6 +573,7 @@ hastd_reload(void)
sizeof(cres->hr_remoteaddr));
cres->hr_replication = nres->hr_replication;
cres->hr_checksum = nres->hr_checksum;
+ cres->hr_compression = nres->hr_compression;
cres->hr_timeout = nres->hr_timeout;
strlcpy(cres->hr_exec, nres->hr_exec,
sizeof(cres->hr_exec));
Added: head/sbin/hastd/lzf.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sbin/hastd/lzf.c Sun Mar 6 23:09:33 2011 (r219354)
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp at schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzf.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# if ULTRA_FAST
+# define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1))
+# elif VERY_FAST
+# define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and compresses similarly.
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define MAX_LIT (1 << 5)
+#define MAX_OFF (1 << 13)
+#define MAX_REF ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value) __builtin_expect ((expr),(value))
+# define inline inline
+#else
+# define expect(expr,value) (expr)
+# define inline static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr) expect ((expr) != 0, 1)
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1> ; literal
+ * LLLooooo oooooooo ; backref L
+ * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len
+#if LZF_STATE_ARG
+ , LZF_STATE htab
+#endif
+ )
+{
+#if !LZF_STATE_ARG
+ LZF_STATE htab;
+#endif
+ const u8 **hslot;
+ const u8 *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ const u8 *in_end = ip + in_len;
+ u8 *out_end = op + out_len;
+ const u8 *ref;
+
+ /* off requires a type wide enough to hold a general pointer difference.
+ * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
+ * works for differences within a single object). We also assume that no
+ * no bit pattern traps. Since the only platform that is both non-POSIX
+ * and fails to support both assumptions is windows 64 bit, we make a
+ * special workaround for it.
+ */
+#if defined (WIN32) && defined (_M_X64)
+ unsigned _int64 off; /* workaround for missing POSIX compliance */
+#else
+ unsigned long off;
+#endif
+ unsigned int hval;
+ int lit;
+
+ if (!in_len || !out_len)
+ return 0;
+
+#if INIT_HTAB
+ memset (htab, 0, sizeof (htab));
+# if 0
+ for (hslot = htab; hslot < htab + HSIZE; hslot++)
+ *hslot++ = ip;
+# endif
+#endif
+
+ lit = 0; op++; /* start run */
+
+ hval = FRST (ip);
+ while (ip < in_end - 2)
+ {
+ hval = NEXT (hval, ip);
+ hslot = htab + IDX (hval);
+ ref = *hslot; *hslot = ip;
+
+ if (1
+#if INIT_HTAB
+ && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+ && (off = ip - ref - 1) < MAX_OFF
+ && ip + 4 < in_end
+ && ref > (const u8 *)in_data
+#if STRICT_ALIGN
+ && ref[0] == ip[0]
+ && ref[1] == ip[1]
+ && ref[2] == ip[2]
+#else
+ && *(const u16 *)ref == *(const u16 *)ip
+ && ref[2] == ip[2]
+#endif
+ )
+ {
+ /* match found at *ref++ */
+ unsigned int len = 2;
+ unsigned int maxlen = in_end - ip - len;
+ maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+ if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+ return 0;
+
+ op [- lit - 1] = lit - 1; /* stop run */
+ op -= !lit; /* undo run if length is zero */
+
+ for (;;)
+ {
+ if (expect_true (maxlen > 16))
+ {
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ }
+
+ do
+ len++;
+ while (len < maxlen && ref[len] == ip[len]);
+
+ break;
+ }
+
+ len -= 2; /* len is now #octets - 1 */
+ ip++;
+
+ if (len < 7)
+ {
+ *op++ = (off >> 8) + (len << 5);
+ }
+ else
+ {
+ *op++ = (off >> 8) + ( 7 << 5);
+ *op++ = len - 7;
+ }
+
+ *op++ = off;
+ lit = 0; op++; /* start run */
+
+ ip += len + 1;
+
+ if (expect_false (ip >= in_end - 2))
+ break;
+
+#if ULTRA_FAST || VERY_FAST
+ --ip;
+# if VERY_FAST && !ULTRA_FAST
+ --ip;
+# endif
+ hval = FRST (ip);
+
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+
+# if VERY_FAST && !ULTRA_FAST
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+# endif
+#else
+ ip -= len + 1;
+
+ do
+ {
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+ }
+ while (len--);
+#endif
+ }
+ else
+ {
+ /* one more literal byte we must copy */
+ if (expect_false (op >= out_end))
+ return 0;
+
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+ }
+
+ if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+ return 0;
+
+ while (ip < in_end)
+ {
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+
+ op [- lit - 1] = lit - 1; /* end run */
+ op -= !lit; /* undo run if length is zero */
+
+ return op - (u8 *)out_data;
+}
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len) \
+ asm ("rep movsb" \
+ : "=D" (dst), "=S" (src), "=c" (len) \
+ : "0" (dst), "1" (src), "2" (len));
+#endif
+
+unsigned int
+lzf_decompress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len)
+{
+ u8 const *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ u8 const *const in_end = ip + in_len;
+ u8 *const out_end = op + out_len;
+
+ do
+ {
+ unsigned int ctrl = *ip++;
+
+ if (ctrl < (1 << 5)) /* literal run */
+ {
+ ctrl++;
+
+ if (op + ctrl > out_end)
+ {
+ SET_ERRNO (E2BIG);
+ return 0;
+ }
+
+#if CHECK_INPUT
+ if (ip + ctrl > in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+
+#ifdef lzf_movsb
+ lzf_movsb (op, ip, ctrl);
+#else
+ do
+ *op++ = *ip++;
+ while (--ctrl);
+#endif
+ }
+ else /* back reference */
+ {
+ unsigned int len = ctrl >> 5;
+
+ u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+#if CHECK_INPUT
+ if (ip >= in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+ if (len == 7)
+ {
+ len += *ip++;
+#if CHECK_INPUT
+ if (ip >= in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+ }
+
+ ref -= *ip++;
+
+ if (op + len + 2 > out_end)
+ {
+ SET_ERRNO (E2BIG);
+ return 0;
+ }
+
+ if (ref < (u8 *)out_data)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+
+#ifdef lzf_movsb
+ len += 2;
+ lzf_movsb (op, ref, len);
+#else
+ *op++ = *ref++;
+ *op++ = *ref++;
+
+ do
+ *op++ = *ref++;
+ while (--len);
+#endif
+ }
+ }
+ while (ip < in_end);
+
+ return op - (u8 *)out_data;
+}
+
Added: head/sbin/hastd/lzf.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sbin/hastd/lzf.h Sun Mar 6 23:09:33 2011 (r219354)
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp at schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+** lzf -- an extremely fast/free compression/decompression-method
+** http://liblzf.plan9.de/
+**
+** This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5, API version */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs return 0,
+ * otherwise return the number of bytes used, which might be considerably
+ * more than in_len (but less than 104% of the original size), so it
+ * makes sense to always use out_len == in_len - 1), to ensure _some_
+ * compression, and store the data uncompressed otherwise (with a flag, of
+ * course.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even different runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list