svn commit: r309846 - in stable/11: contrib/libdivsufsort usr.bin/bsdiff/bsdiff

Xin LI delphij at FreeBSD.org
Sun Dec 11 06:08:03 UTC 2016


Author: delphij
Date: Sun Dec 11 06:08:01 2016
New Revision: 309846
URL: https://svnweb.freebsd.org/changeset/base/309846

Log:
  MFC r303285:
  
  Change bsdiff to use divsufsort suffix sort library instead of qsufsort,
  which is more efficient.
  
  Note that for now we do not create a separate library for libdivsufsort
  because it's not used anywhere else.
  
  Obtained from:        Chromium

Added:
  stable/11/contrib/libdivsufsort/
     - copied from r303285, head/contrib/libdivsufsort/
  stable/11/usr.bin/bsdiff/bsdiff/config.h
     - copied unchanged from r303285, head/usr.bin/bsdiff/bsdiff/config.h
  stable/11/usr.bin/bsdiff/bsdiff/divsufsort64.h
     - copied unchanged from r303285, head/usr.bin/bsdiff/bsdiff/divsufsort64.h
Modified:
  stable/11/usr.bin/bsdiff/bsdiff/Makefile
  stable/11/usr.bin/bsdiff/bsdiff/bsdiff.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/usr.bin/bsdiff/bsdiff/Makefile
==============================================================================
--- stable/11/usr.bin/bsdiff/bsdiff/Makefile	Sun Dec 11 04:02:38 2016	(r309845)
+++ stable/11/usr.bin/bsdiff/bsdiff/Makefile	Sun Dec 11 06:08:01 2016	(r309846)
@@ -1,6 +1,17 @@
 # $FreeBSD$
 
-PROG=	bsdiff
+PROG=		bsdiff
+
+# libdivsufsort configured with:
+# cmake -DCMAKE_BUILD_TYPE="Release" -DBUILD_DIVSUFSORT64=ON
+.PATH:		${.CURDIR}/../../../contrib/libdivsufsort/lib
+CFLAGS+=	-DHAVE_CONFIG_H=1 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+CFLAGS+=	-D_LARGE_FILES -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
+CFLAGS+=	-D__STDC_LIMIT_MACROS -DBUILD_DIVSUFSORT64
+CFLAGS+=	-I${.CURDIR}/../../../contrib/libdivsufsort/include -I${.CURDIR}
+SRCS=		divsufsort.c sssort.c trsort.c utils.c
+
+SRCS+=		bsdiff.c
 
 LIBADD=	bz2
 

Modified: stable/11/usr.bin/bsdiff/bsdiff/bsdiff.c
==============================================================================
--- stable/11/usr.bin/bsdiff/bsdiff/bsdiff.c	Sun Dec 11 04:02:38 2016	(r309845)
+++ stable/11/usr.bin/bsdiff/bsdiff/bsdiff.c	Sun Dec 11 06:08:01 2016	(r309846)
@@ -44,106 +44,11 @@ __FBSDID("$FreeBSD$");
 #define O_BINARY 0
 #endif
 
-#define MIN(x,y) (((x)<(y)) ? (x) : (y))
-
-static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
-{
-	off_t i,j,k,x,tmp,jj,kk;
-
-	if(len<16) {
-		for(k=start;k<start+len;k+=j) {
-			j=1;x=V[I[k]+h];
-			for(i=1;k+i<start+len;i++) {
-				if(V[I[k+i]+h]<x) {
-					x=V[I[k+i]+h];
-					j=0;
-				}
-				if(V[I[k+i]+h]==x) {
-					tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
-					j++;
-				}
-			}
-			for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
-			if(j==1) I[k]=-1;
-		}
-		return;
-	}
-
-	x=V[I[start+len/2]+h];
-	jj=0;kk=0;
-	for(i=start;i<start+len;i++) {
-		if(V[I[i]+h]<x) jj++;
-		if(V[I[i]+h]==x) kk++;
-	}
-	jj+=start;kk+=jj;
-
-	i=start;j=0;k=0;
-	while(i<jj) {
-		if(V[I[i]+h]<x) {
-			i++;
-		} else if(V[I[i]+h]==x) {
-			tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
-			j++;
-		} else {
-			tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
-			k++;
-		}
-	}
-
-	while(jj+j<kk) {
-		if(V[I[jj+j]+h]==x) {
-			j++;
-		} else {
-			tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
-			k++;
-		}
-	}
-
-	if(jj>start) split(I,V,start,jj-start,h);
-
-	for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
-	if(jj==kk-1) I[jj]=-1;
-
-	if(start+len>kk) split(I,V,kk,start+len-kk,h);
-}
-
-static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
-{
-	off_t buckets[256];
-	off_t i,h,len;
-
-	for(i=0;i<256;i++) buckets[i]=0;
-	for(i=0;i<oldsize;i++) buckets[old[i]]++;
-	for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
-	for(i=255;i>0;i--) buckets[i]=buckets[i-1];
-	buckets[0]=0;
-
-	for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
-	I[0]=oldsize;
-	for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
-	V[oldsize]=0;
-	for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
-	I[0]=-1;
-
-	for(h=1;I[0]!=-(oldsize+1);h+=h) {
-		len=0;
-		for(i=0;i<oldsize+1;) {
-			if(I[i]<0) {
-				len-=I[i];
-				i-=I[i];
-			} else {
-				if(len) I[i-len]=-len;
-				len=V[I[i]]+1-i;
-				split(I,V,i,len,h);
-				i+=len;
-				len=0;
-			}
-		}
-		if(len) I[i-len]=-len;
-	}
+#include "divsufsort64.h"
+#define saidx_t saidx64_t
+#define divsufsort divsufsort64
 
-	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
-}
+#define MIN(x,y) (((x)<(y)) ? (x) : (y))
 
 static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
 {
@@ -212,7 +117,7 @@ int main(int argc,char *argv[])
 	int fd;
 	u_char *old,*new;
 	off_t oldsize,newsize;
-	off_t *I,*V;
+	saidx_t *I;
 	off_t scan,pos,len;
 	off_t lastscan,lastpos,lastoffset;
 	off_t oldscore,scsc;
@@ -248,12 +153,9 @@ int main(int argc,char *argv[])
 		(read(fd,old,oldsize)!=oldsize) ||
 		(close(fd)==-1)) err(1,"%s",argv[1]);
 
-	if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
-		((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
-
-	qsufsort(I,V,old,oldsize);
+	if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL);
 
-	free(V);
+	if(divsufsort(old, I, oldsize)) err(1, "divsufsort");
 
 	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
 		that we never try to malloc(0) and get a NULL pointer */

Copied: stable/11/usr.bin/bsdiff/bsdiff/config.h (from r303285, head/usr.bin/bsdiff/bsdiff/config.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/usr.bin/bsdiff/bsdiff/config.h	Sun Dec 11 06:08:01 2016	(r309846, copy of r303285, head/usr.bin/bsdiff/bsdiff/config.h)
@@ -0,0 +1,83 @@
+/*
+ * config.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** Define to the version of this package. **/
+#define PROJECT_VERSION_FULL "2.0.1-14-g5f60d6f"
+
+/** Define to 1 if you have the header files. **/
+#define HAVE_INTTYPES_H 1
+#define HAVE_STDDEF_H 1
+#define HAVE_STDINT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_STRINGS_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_SYS_TYPES_H 1
+
+/** for WinIO **/
+/* #undef HAVE_IO_H */
+/* #undef HAVE_FCNTL_H */
+/* #undef HAVE__SETMODE */
+/* #undef HAVE_SETMODE */
+/* #undef HAVE__FILENO */
+/* #undef HAVE_FOPEN_S */
+/* #undef HAVE__O_BINARY */
+#ifndef HAVE__SETMODE
+# if HAVE_SETMODE
+#  define _setmode setmode
+#  define HAVE__SETMODE 1
+# endif
+# if HAVE__SETMODE && !HAVE__O_BINARY
+#  define _O_BINARY 0
+#  define HAVE__O_BINARY 1
+# endif
+#endif
+
+/** for inline **/
+#ifndef INLINE
+# define INLINE inline
+#endif
+
+/** for VC++ warning **/
+#ifdef _MSC_VER
+#pragma warning(disable: 4127)
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _CONFIG_H */

Copied: stable/11/usr.bin/bsdiff/bsdiff/divsufsort64.h (from r303285, head/usr.bin/bsdiff/bsdiff/divsufsort64.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/usr.bin/bsdiff/bsdiff/divsufsort64.h	Sun Dec 11 06:08:01 2016	(r309846, copy of r303285, head/usr.bin/bsdiff/bsdiff/divsufsort64.h)
@@ -0,0 +1,180 @@
+/*
+ * divsufsort64.h for libdivsufsort64
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT64_H
+#define _DIVSUFSORT64_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <inttypes.h>
+
+#ifndef DIVSUFSORT_API
+# ifdef DIVSUFSORT_BUILD_DLL
+#  define DIVSUFSORT_API 
+# else
+#  define DIVSUFSORT_API 
+# endif
+#endif
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef uint8_t sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef int32_t saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX64_T
+#define SAIDX64_T
+typedef int64_t saidx64_t;
+#endif /* SAIDX64_T */
+#ifndef PRIdSAINT_T
+#define PRIdSAINT_T PRId32
+#endif /* PRIdSAINT_T */
+#ifndef PRIdSAIDX64_T
+#define PRIdSAIDX64_T PRId64
+#endif /* PRIdSAIDX64_T */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+divsufsort64(const sauchar_t *T, saidx64_t *SA, saidx64_t n);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx64_t
+divbwt64(const sauchar_t *T, sauchar_t *U, saidx64_t *A, saidx64_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort64_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform64(const sauchar_t *T, sauchar_t *U,
+             saidx64_t *SA /* can NULL */,
+             saidx64_t n, saidx64_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform64(const sauchar_t *T, sauchar_t *U,
+                     saidx64_t *A /* can NULL */,
+                     saidx64_t n, saidx64_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck64(const sauchar_t *T, const saidx64_t *SA, saidx64_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx64_t
+sa_search64(const sauchar_t *T, saidx64_t Tsize,
+          const sauchar_t *P, saidx64_t Psize,
+          const saidx64_t *SA, saidx64_t SAsize,
+          saidx64_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx64_t
+sa_simplesearch64(const sauchar_t *T, saidx64_t Tsize,
+                const saidx64_t *SA, saidx64_t SAsize,
+                saint_t c, saidx64_t *left);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT64_H */


More information about the svn-src-stable-11 mailing list