PERFORCE change 145103 for review

Gabor Kovesdan gabor at FreeBSD.org
Sat Jul 12 16:03:47 UTC 2008


http://perforce.freebsd.org/chv.cgi?CH=145103

Change 145103 by gabor at gabor_server on 2008/07/12 16:03:15

	- First read the first chunk of the file to a buffer and use a
	  wrapper function, which returns a line from this buffer if
	  it has data or reads the file itself if it doesn't have
	  data any more.  The content of the buffer will be used for
	  the binary check.  In this way we won't need to read some
	  chunk of the file and then seek to the beginning, which
	  does not work for bzip2 files, nor will we need to pre-extract
	  the compressed files to the tmp directory.

Affected files ...

.. //depot/projects/soc2008/gabor_textproc/grep/file.c#15 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.h#36 edit
.. //depot/projects/soc2008/gabor_textproc/grep/util.c#58 edit

Differences ...

==== //depot/projects/soc2008/gabor_textproc/grep/file.c#15 (text+ko) ====

@@ -51,37 +51,61 @@
 #include "grep.h"
 
 static char	 fname[MAXPATHLEN];
+static char	*lnbuf;
+static size_t	 lnbuflen;
+static char	 binbuf[BUFSIZ * 4];
+static int	 binbufsiz;
+char		*binbufptr;
 
 #define iswbinary(ch)	(!iswspace((ch)) && iswcntrl((ch)))
 
-int
-bin_file(struct file *f)
+char *
+grep_fgetln(struct file *f, size_t *len)
 {
-	wint_t	 ch = L'\0';
-	size_t	 i;
-	int	 ret = 0;
+	int	 i;
+	char	 ch;
+	size_t	 size;
+	wchar_t	 wbinbuf[BUFSIZ];
 
-	if (f->noseek)
-		return (0);
+	if ((binbufptr == NULL) && (binbehave != BINFILE_TEXT)) {
+		for (i = 0; i < (BUFSIZ * 4); i++) {
+			if (feof(f->f))
+				break;
+			ch = fgetc(f->f);
+			binbuf[i] = ch;
+		}
+		binbufsiz = i;
+		binbufptr = binbuf;
+//		size = mbsnrtowcs(wbinbuf, &binbuf, BUFSIZ * 4, BUFSIZ - 1, NULL);
+		f->binary = 0;
+//		for (; size > 0; size--)
+//			if (iswbinary(wbinbuf[size])) {
+//				f->binary = 1;
+//				break;
+//			}
+	}
 
-	if (fseek(f->f, 0L, SEEK_SET) == -1)
-		return (0);
-
-	errno = 0;
-	for (i = 0; i <= BUFSIZ; i++) {
-		if ((ch = fgetwc(f->f)) == WEOF) {
-			if (errno == EILSEQ)
-				ret = 1;
-			break;
+	for (i = 0; ; i++) {
+		if (binbufptr == &binbuf[binbufsiz]) {
+			if (feof(f->f))
+				break;
+			ch = fgetc(f->f);
+		} else {
+			ch = binbufptr[0];
+			binbufptr++;
+		}
+		if (i >= lnbuflen) {
+			lnbuflen *= 2;
+			lnbuf = grep_realloc(lnbuf, ++lnbuflen);
 		}
-		if (iswbinary(ch)) {
-			ret = 1;
+		if (ch == '\n')
 			break;
-		}
+		lnbuf[i] = ch;
 	}
-
-	rewind(f->f);
-	return (ret);
+	if (feof(f->f) && (i == 0))
+		return NULL;
+	*len = i;
+	return (lnbuf);
 }
 
 struct file *
@@ -93,7 +117,6 @@
 
 	f = grep_malloc(sizeof *f);
 
-	f->noseek = isatty(STDIN_FILENO);
 	if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL)
 		return (f);
 
@@ -111,7 +134,6 @@
 	snprintf(fname, sizeof fname, "%s", path);
 
 	f = grep_malloc(sizeof *f);
-	f->noseek = 0;
 
 	if (Zflag || Jflag) {
 		templ = grep_malloc(sizeof(char) * 15);
@@ -140,7 +162,6 @@
 			char	 buf[BUFSIZ];
 			int	 bzerror;
 			FILE	*file;
-
 			if ((file = fopen(fname, "r")) == NULL)
 				err(2, NULL);
 			if ((bzf = BZ2_bzReadOpen(&bzerror, file, 0, 0, NULL, 0)) == NULL)

==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#36 (text+ko) ====

@@ -69,7 +69,7 @@
 #define MAX_LINE_MATCHES	32
 
 struct file {
-	int		 noseek;
+	int		 binary;
 	FILE		*f;
 	struct mmfile	*mmf;
 };
@@ -117,6 +117,6 @@
 void	 clearqueue(void);
 
 /* file.c */
-int		 bin_file(struct file * f);
 struct file	*grep_stdin_open(void);
 struct file	*grep_open(char *path);
+char		*grep_fgetln(struct file *f, size_t *len);

==== //depot/projects/soc2008/gabor_textproc/grep/util.c#58 (text+ko) ====

@@ -129,7 +129,7 @@
 	struct file	*f;
 	struct stat	 sb;
 	mode_t		 s;
-	int		 c, t, nottext;
+	int		 c, t;
 
 	if (mflag && (mcount <= 0))
 		return (0);
@@ -159,8 +159,7 @@
 		return (0);
 	}
 
-	nottext = bin_file(f);
-	if (nottext && binbehave == BINFILE_SKIP) {
+	if (f->binary && binbehave == BINFILE_SKIP) {
 		fclose(f->f);
 		free(f);
 		return (0);
@@ -177,13 +176,13 @@
 		initqueue();
 	for (c = 0;  c == 0 || !(lflag || qflag); ) {
 		ln.off += ln.len + 1;
-		if ((ln.dat = fgetln(f->f, &ln.len)) == NULL)
+		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
 			break;
 		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
 			--ln.len;
 		ln.line_no++;
 
-		if ((t = procline(&ln, nottext)) == 0 && Bflag > 0) {
+		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
 			enqueue(&ln);
 			linesqueued++;
 		}
@@ -210,7 +209,7 @@
 	if (Lflag && c == 0)
 		printf("%s\n", fn);
 	if (c && !cflag && !lflag && !Lflag &&
-	    binbehave == BINFILE_BIN && nottext && !qflag)
+	    binbehave == BINFILE_BIN && f->binary && !qflag)
 		printf(getstr(12), fn);
 
 	return (c);


More information about the p4-projects mailing list