svn commit: r224939 - in user/gabor/tre-integration: contrib/tre/lib include lib/libc/regex

Gabor Kovesdan gabor at FreeBSD.org
Wed Aug 17 14:08:02 UTC 2011


Author: gabor
Date: Wed Aug 17 14:08:02 2011
New Revision: 224939
URL: http://svn.freebsd.org/changeset/base/224939

Log:
  - Introduce a public interface for fast matching

Added:
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.c   (contents, props changed)
  user/gabor/tre-integration/include/fastmatch.h   (contents, props changed)
  user/gabor/tre-integration/include/hashtable.h
     - copied unchanged from r223646, user/gabor/tre-integration/contrib/tre/lib/hashtable.h
Deleted:
  user/gabor/tre-integration/contrib/tre/lib/hashtable.h
Modified:
  user/gabor/tre-integration/contrib/tre/lib/hashtable.c
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
  user/gabor/tre-integration/include/Makefile
  user/gabor/tre-integration/lib/libc/regex/Makefile.inc

Added: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Wed Aug 17 14:08:02 2011	(r224939)
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <gabor at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <fastmatch.h>
+#include <string.h>
+
+#include "tre-fastmatch.h"
+#include "tre-internal.h"
+#include "xmalloc.h"
+
+/* XXX: clean up */
+#define CONV_PAT							\
+  int ret;								\
+  tre_char_t *wregex;							\
+  size_t wlen;								\
+									\
+  wregex = xmalloc(sizeof(tre_char_t) * (n + 1));			\
+  if (wregex == NULL)							\
+    return REG_ESPACE;							\
+  else									\
+    {									\
+      int consumed;							\
+      tre_char_t *wcptr = wregex;					\
+      mbstate_t state;							\
+      memset(&state, '\0', sizeof(state));				\
+      while (n > 0)							\
+        {								\
+          consumed = tre_mbrtowc(wcptr, regex, n, &state);		\
+									\
+          switch (consumed)						\
+            {								\
+            case 0:							\
+              if (*regex == '\0')					\
+                consumed = 1;						\
+              else							\
+                {							\
+                  xfree(wregex);					\
+                  return REG_BADPAT;					\
+                }							\
+              break;							\
+            case -1:							\
+              DPRINT(("mbrtowc: error %d: %s.\n", errno,		\
+		strerror(errno)));					\
+              xfree(wregex);						\
+              return REG_BADPAT;					\
+            case -2:							\
+              consumed = n;						\
+              break;							\
+            }								\
+          regex += consumed;						\
+          n -= consumed;						\
+          wcptr++;							\
+        }								\
+      wlen = wcptr - wregex;						\
+    }									\
+									\
+  wregex[wlen] = L'\0';
+
+int
+tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
+{
+  CONV_PAT;
+
+  ret = tre_compile_literal(preg, wregex, n, cflags);
+  xfree(wregex);
+
+  return ret;
+}
+
+int
+tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
+{
+  CONV_PAT;
+
+  ret = (cflags & REG_LITERAL) ?
+    tre_compile_literal(preg, wregex, n, cflags) :
+    tre_compile_fast(preg, wregex, n, cflags);
+  xfree(wregex);
+
+  return ret;
+}
+
+
+int
+tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags)
+{
+  return tre_fixncomp(preg, regex, 0, cflags);
+}
+
+int
+tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags)
+{
+  return tre_fastncomp(preg, regex, 0, cflags);
+}
+
+int
+tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return tre_compile_literal(preg, regex, n, cflags);
+}
+
+int
+tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return (cflags & REG_LITERAL) ?
+    tre_compile_literal(preg, regex, n, cflags) :
+    tre_compile_fast(preg, regex, n, cflags);
+}
+
+int
+tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_fixwncomp(preg, regex, 0, cflags);
+}
+
+int
+tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_fastwncomp(preg, regex, 0, cflags);
+}
+
+void
+tre_fastfree(fastmatch_t *preg)
+{
+  tre_free_fast(preg);
+}
+
+int
+tre_fastnexec(const fastmatch_t *preg, const char *string, size_t n,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+  return tre_match_fast(preg, string, n, type, nmatch,
+    pmatch, eflags);
+}
+
+int
+tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
+	     regmatch_t pmatch[], int eflags)
+{
+  return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
+}
+
+int
+tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t n,
+          size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  return tre_match_fast(preg, string, n, STR_WIDE, nmatch,
+    pmatch, eflags);
+}
+
+int
+tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
+}
+

Modified: user/gabor/tre-integration/contrib/tre/lib/hashtable.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/hashtable.c	Wed Aug 17 13:58:39 2011	(r224938)
+++ user/gabor/tre-integration/contrib/tre/lib/hashtable.c	Wed Aug 17 14:08:02 2011	(r224939)
@@ -25,11 +25,10 @@
  */
 
 #include <sys/hash.h>
+#include <hashtable.h>
 #include <stdlib.h>
 #include <string.h>
 
-#include "hashtable.h"
-
 hashtable
 *hashtable_init(size_t table_size, size_t key_size, size_t value_size)
 {

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Wed Aug 17 13:58:39 2011	(r224938)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Wed Aug 17 14:08:02 2011	(r224939)
@@ -28,6 +28,7 @@
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif /* HAVE_CONFIG_H */
+#include <hashtable.h>
 #include <limits.h>
 #include <regex.h>
 #include <stdbool.h>

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h	Wed Aug 17 13:58:39 2011	(r224938)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h	Wed Aug 17 14:08:02 2011	(r224939)
@@ -28,6 +28,8 @@
 #ifndef TRE_FASTMATCH_H
 #define TRE_FASTMATCH_H 1
 
+#include <fastmatch.h>
+#include <hashtable.h>
 #include <limits.h>
 #include <regex.h>
 #include <stdbool.h>
@@ -35,25 +37,6 @@
 #include "hashtable.h"
 #include "tre-internal.h"
 
-typedef struct {
-  size_t wlen;
-  size_t len;
-  tre_char_t *wpattern;
-  int hasdot;
-  int qsBc[UCHAR_MAX + 1];
-  int *bmGs;
-  char *pattern;
-  int defBc;
-  hashtable *qsBc_table;
-  int *sbmGs;
-  /* flags */
-  bool bol;
-  bool eol;
-  bool word;
-  bool icase;
-  bool newline;
-} fastmatch_t;
-
 int	tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex,
 	    size_t, int);
 int	tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);

Modified: user/gabor/tre-integration/include/Makefile
==============================================================================
--- user/gabor/tre-integration/include/Makefile	Wed Aug 17 13:58:39 2011	(r224938)
+++ user/gabor/tre-integration/include/Makefile	Wed Aug 17 14:08:02 2011	(r224939)
@@ -9,9 +9,9 @@ CLEANFILES= osreldate.h version vers.c
 SUBDIR= arpa gssapi protocols rpcsvc rpc
 INCS=	a.out.h ar.h assert.h bitstring.h complex.h cpio.h _ctype.h ctype.h \
 	db.h \
-	dirent.h dlfcn.h elf.h elf-hints.h err.h fmtmsg.h fnmatch.h fstab.h \
-	fts.h ftw.h getopt.h glob.h grp.h gssapi.h \
-	ieeefp.h ifaddrs.h \
+	dirent.h dlfcn.h elf.h elf-hints.h err.h fastmatch.h fmtmsg.h fnmatch.h \
+	fstab.h fts.h ftw.h getopt.h glob.h grp.h gssapi.h \
+	hashtable.h ieeefp.h ifaddrs.h \
 	inttypes.h iso646.h kenv.h langinfo.h libgen.h limits.h link.h \
 	locale.h malloc.h malloc_np.h memory.h monetary.h mpool.h mqueue.h \
 	ndbm.h netconfig.h \

Added: user/gabor/tre-integration/include/fastmatch.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/include/fastmatch.h	Wed Aug 17 14:08:02 2011	(r224939)
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <gabor at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FASTMATCH_H
+#define FASTMATCH_H 1
+
+#include <hashtable.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <wchar.h>
+
+typedef struct {
+  size_t	 wlen;
+  size_t	 len;
+  wchar_t	*wpattern;
+  int		 hasdot;
+  int		 qsBc[UCHAR_MAX + 1];
+  int		*bmGs;
+  char		*pattern;
+  int		 defBc;
+  hashtable	*qsBc_table;
+  int		*sbmGs;
+
+  /* flags */
+  bool		 bol;
+  bool		 eol;
+  bool		 word;
+  bool		 icase;
+  bool		 newline;
+} fastmatch_t;
+
+extern int
+tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags);
+
+extern int
+tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags);
+
+extern int
+tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
+  regmatch_t pmatch[], int eflags);
+
+extern void
+tre_fastfree(fastmatch_t *preg);
+
+extern int
+tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
+         size_t nmatch, regmatch_t pmatch[], int eflags);
+
+/* Versions with a maximum length argument and therefore the capability to
+   handle null characters in the middle of the strings. */
+extern int
+tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
+  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+extern int
+tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
+
+extern int
+tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
+
+extern int
+tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
+  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+#endif		/* FASTMATCH_H */

Copied: user/gabor/tre-integration/include/hashtable.h (from r223646, user/gabor/tre-integration/contrib/tre/lib/hashtable.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/include/hashtable.h	Wed Aug 17 14:08:02 2011	(r224939, copy of r223646, user/gabor/tre-integration/contrib/tre/lib/hashtable.h)
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <gabor at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef HASHTABLE_H
+#define HASHTABLE_H 1
+
+#include <sys/types.h>
+
+typedef struct {
+	void		*key;
+	void		*value;
+} hashtable_entry;
+
+typedef struct {
+	size_t		 key_size;
+	size_t		 table_size;
+	size_t		 usage;
+	size_t		 value_size;
+	hashtable_entry **entries;
+} hashtable;
+
+void		 hashtable_free(hashtable *);
+int		 hashtable_get(hashtable *, const void *, void *);
+hashtable	*hashtable_init(size_t, size_t, size_t);
+int		 hashtable_put(hashtable *, const void *, const void *);
+int		 hashtable_remove(hashtable *, const void *);
+
+#endif	/* HASHTABLE.H */

Modified: user/gabor/tre-integration/lib/libc/regex/Makefile.inc
==============================================================================
--- user/gabor/tre-integration/lib/libc/regex/Makefile.inc	Wed Aug 17 13:58:39 2011	(r224938)
+++ user/gabor/tre-integration/lib/libc/regex/Makefile.inc	Wed Aug 17 14:08:02 2011	(r224939)
@@ -5,9 +5,10 @@
 
 CFLAGS+=-DHAVE_CONFIG_H -DTRE_LIBC_BUILD -I${.CURDIR}/../../contrib/tre
 
-SRCS+=	hashtable.c regcomp.c regerror.c regexec.c tre-ast.c tre-compile.c \
-	tre-fastmatch.c tre-match-approx.c tre-match-backtrack.c \
-	tre-match-parallel.c tre-mem.c tre-parse.c tre-stack.c xmalloc.c
+SRCS+=	fastmatch.c hashtable.c regcomp.c regerror.c regexec.c tre-ast.c \
+	tre-compile.c tre-fastmatch.c tre-match-approx.c \
+	tre-match-backtrack.c tre-match-parallel.c tre-mem.c tre-parse.c \
+	tre-stack.c xmalloc.c
 
 MAN+=	regex.3 re_format.7
 


More information about the svn-src-user mailing list