svn commit: r211364 - head/usr.bin/grep

Gabor Kovesdan gabor at FreeBSD.org
Sun Aug 15 22:15:05 UTC 2010


Author: gabor
Date: Sun Aug 15 22:15:04 2010
New Revision: 211364
URL: http://svn.freebsd.org/changeset/base/211364

Log:
  - Revert strlcpy() changes to memcpy() because it's more efficient and
    former may be safer but in this case it doesn't add extra
    safety [1]
  - Fix -w option [2]
  - Fix handling of GREP_OPTIONS [3]
  - Fix --line-buffered
  - Make stdin input imply --line-buffered so that tail -f can be piped
    to grep [4]
  - Imply -h if single file is grepped, this is the GNU behaviour
  - Reduce locking overhead to gain some more performance [5]
  - Inline some functions to help the compiler better optimize the code
  - Use shortcut for empty files [6]
  
  PR:		bin/149425 [6]
  Prodded by:	jilles [1]
  Reported by:	Alex Kozlov <spam at rm-rf.kiev.ua> [2] [3],
  		swell.k at gmail.com [2],
  		poyopoyo at puripuri.plala.or.jp [4]
  Submitted by:	scf [5],
  		Shuichi KITAGUCHI <ki at hh.iij4u.or.jp> [6]
  Approved by:	delphij (mentor)

Modified:
  head/usr.bin/grep/fastgrep.c
  head/usr.bin/grep/file.c
  head/usr.bin/grep/grep.c
  head/usr.bin/grep/grep.h
  head/usr.bin/grep/queue.c
  head/usr.bin/grep/util.c

Modified: head/usr.bin/grep/fastgrep.c
==============================================================================
--- head/usr.bin/grep/fastgrep.c	Sun Aug 15 22:09:43 2010	(r211363)
+++ head/usr.bin/grep/fastgrep.c	Sun Aug 15 22:15:04 2010	(r211364)
@@ -46,8 +46,8 @@ __FBSDID("$FreeBSD$");
 
 #include "grep.h"
 
-static int	grep_cmp(const unsigned char *, const unsigned char *, size_t);
-static void	grep_revstr(unsigned char *, int);
+static inline int	grep_cmp(const unsigned char *, const unsigned char *, size_t);
+static inline void	grep_revstr(unsigned char *, int);
 
 void
 fgrepcomp(fastgrep_t *fg, const char *pat)
@@ -273,7 +273,7 @@ grep_search(fastgrep_t *fg, unsigned cha
  * Returns:	i >= 0 on failure (position that it failed)
  *		-1 on success
  */
-static int
+static inline int
 grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len)
 {
 	size_t size;
@@ -318,7 +318,7 @@ grep_cmp(const unsigned char *pat, const
 	return (-1);
 }
 
-static void
+static inline void
 grep_revstr(unsigned char *str, int len)
 {
 	int i;

Modified: head/usr.bin/grep/file.c
==============================================================================
--- head/usr.bin/grep/file.c	Sun Aug 15 22:09:43 2010	(r211363)
+++ head/usr.bin/grep/file.c	Sun Aug 15 22:15:04 2010	(r211364)
@@ -67,14 +67,14 @@ static int	 bzerr;
  * Returns a single character according to the file type.
  * Returns -1 on failure.
  */
-int
+static inline int
 grep_fgetc(struct file *f)
 {
 	unsigned char c;
 
 	switch (filebehave) {
 	case FILE_STDIO:
-		return (fgetc(f->f));
+		return (getc_unlocked(f->f));
 	case FILE_GZIP:
 		return (gzgetc(f->gzf));
 	case FILE_BZIP:
@@ -92,13 +92,13 @@ grep_fgetc(struct file *f)
  * Returns true if the file position is a EOF, returns false
  * otherwise.
  */
-int
+static inline int
 grep_feof(struct file *f)
 {
 
 	switch (filebehave) {
 	case FILE_STDIO:
-		return (feof(f->f));
+		return (feof_unlocked(f->f));
 	case FILE_GZIP:
 		return (gzeof(f->gzf));
 	case FILE_BZIP:
@@ -131,6 +131,9 @@ grep_fgetln(struct file *f, size_t *len)
 				st.st_size = MAXBUFSIZ;
 			else if (stat(fname, &st) != 0)
 				err(2, NULL);
+			/* no need to allocate buffer. */
+			if (st.st_size == 0)
+				return (NULL);
 
 			bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
 			    (st.st_size / 2) : MAXBUFSIZ;
@@ -142,6 +145,8 @@ grep_fgetln(struct file *f, size_t *len)
 				if (ch == EOF)
 					break;
 				binbuf[i++] = ch;
+				if ((ch == '\n') && lbflag)
+					break;
 			}
 
 			f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
@@ -184,11 +189,16 @@ grep_stdin_open(void)
 {
 	struct file *f;
 
+	/* Processing stdin implies --line-buffered for tail -f to work. */
+	lbflag = true;
+
 	snprintf(fname, sizeof fname, "%s", getstr(1));
 
 	f = grep_malloc(sizeof *f);
 
+	binbuf = NULL;
 	if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
+		flockfile(f->f);
 		f->stdin = true;
 		return (f);
 	}
@@ -209,11 +219,14 @@ grep_open(const char *path)
 
 	f = grep_malloc(sizeof *f);
 
+	binbuf = NULL;
 	f->stdin = false;
 	switch (filebehave) {
 	case FILE_STDIO:
-		if ((f->f = fopen(path, "r")) != NULL)
+		if ((f->f = fopen(path, "r")) != NULL) {
+			flockfile(f->f);
 			return (f);
+		}
 		break;
 	case FILE_GZIP:
 		if ((f->gzf = gzopen(fname, "r")) != NULL)
@@ -238,6 +251,7 @@ grep_close(struct file *f)
 
 	switch (filebehave) {
 	case FILE_STDIO:
+		funlockfile(f->f);
 		fclose(f->f);
 		break;
 	case FILE_GZIP:
@@ -251,5 +265,4 @@ grep_close(struct file *f)
 	/* Reset read buffer for the file we are closing */
 	binbufptr = NULL;
 	free(binbuf);
-
 }

Modified: head/usr.bin/grep/grep.c
==============================================================================
--- head/usr.bin/grep/grep.c	Sun Aug 15 22:09:43 2010	(r211363)
+++ head/usr.bin/grep/grep.c	Sun Aug 15 22:15:04 2010	(r211364)
@@ -121,8 +121,8 @@ int	 devbehave = DEV_READ;		/* -D: handl
 int	 dirbehave = DIR_READ;		/* -dRr: handling of directories */
 int	 linkbehave = LINK_READ;	/* -OpS: handling of symlinks */
 
-bool	 dexclude, dinclude;	/* --exclude amd --include */
-bool	 fexclude, finclude;	/* --exclude-dir and --include-dir */
+bool	 dexclude, dinclude;	/* --exclude-dir and --include-dir */
+bool	 fexclude, finclude;	/* --exclude and --include */
 
 enum {
 	BIN_OPT = CHAR_MAX + 1,
@@ -236,7 +236,8 @@ add_pattern(char *pat, size_t len)
 		--len;
 	/* pat may not be NUL-terminated */
 	pattern[patterns] = grep_malloc(len + 1);
-	strlcpy(pattern[patterns], pat, len + 1);
+	memcpy(pattern[patterns], pat, len);
+	pattern[patterns][len] = '\0';
 	++patterns;
 }
 
@@ -355,38 +356,33 @@ main(int argc, char *argv[])
 
 	eopts = getenv("GREP_OPTIONS");
 
-	eargc = 1;
+	/* support for extra arguments in GREP_OPTIONS */
+	eargc = 0;
 	if (eopts != NULL) {
 		char *str;
 
-		for(i = 0; i < strlen(eopts); i++)
-			if (eopts[i] == ' ')
+		/* make an estimation of how many extra arguments we have */
+		for (unsigned int j = 0; j < strlen(eopts); j++)
+			if (eopts[j] == ' ')
 				eargc++;
 
 		eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
 
-		str = strtok(eopts, " ");
 		eargc = 0;
-
-		while(str != NULL) {
-			eargv[++eargc] = (char *)grep_malloc(sizeof(char) *
-			    (strlen(str) + 1));
-			strlcpy(eargv[eargc], str, strlen(str) + 1);
-			str = strtok(NULL, " ");
-		}
-		eargv[++eargc] = NULL;
+		/* parse extra arguments */
+		while ((str = strsep(&eopts, " ")) != NULL)
+			eargv[eargc++] = grep_strdup(str);
 
 		aargv = (char **)grep_calloc(eargc + argc + 1,
 		    sizeof(char *));
-		aargv[0] = argv[0];
 
-		for(i = 1; i < eargc; i++)
-			aargv[i] = eargv[i];
-		for(int j = 1; j < argc; j++)
-			aargv[i++] = argv[j];
-
-		aargc = eargc + argc - 1;
+		aargv[0] = argv[0];
+		for (i = 0; i < eargc; i++)
+			aargv[i + 1] = eargv[i];
+		for (int j = 1; j < argc; j++, i++)
+			aargv[i + 1] = argv[j];
 
+		aargc = eargc + argc;
 	} else {
 		aargv = argv;
 		aargc = argc;
@@ -609,11 +605,11 @@ main(int argc, char *argv[])
 			add_fpattern(optarg, EXCL_PAT);
 			break;
 		case R_DINCLUDE_OPT:
-			dexclude = true;
+			dinclude = true;
 			add_dpattern(optarg, INCL_PAT);
 			break;
 		case R_DEXCLUDE_OPT:
-			dinclude = true;
+			dexclude = true;
 			add_dpattern(optarg, EXCL_PAT);
 			break;
 		case HELP_OPT:
@@ -685,12 +681,15 @@ main(int argc, char *argv[])
 
 	if (dirbehave == DIR_RECURSE)
 		c = grep_tree(aargv);
-	else 
+	else {
+		if (aargc == 1)
+			hflag = true;
 		for (c = 0; aargc--; ++aargv) {
 			if ((finclude || fexclude) && !file_matching(*aargv))
 				continue;
 			c+= procfile(*aargv);
 		}
+	}
 
 #ifndef WITHOUT_NLS
 	catclose(catalog);

Modified: head/usr.bin/grep/grep.h
==============================================================================
--- head/usr.bin/grep/grep.h	Sun Aug 15 22:09:43 2010	(r211363)
+++ head/usr.bin/grep/grep.h	Sun Aug 15 22:15:04 2010	(r211364)
@@ -115,7 +115,7 @@ extern int	 cflags, eflags;
 extern bool	 Eflag, Fflag, Gflag, Hflag, Lflag,
 		 bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag,
 		 qflag, sflag, vflag, wflag, xflag;
-extern bool	 dexclude, dinclude, fexclude, finclude, nullflag;
+extern bool	 dexclude, dinclude, fexclude, finclude, lbflag, nullflag;
 extern unsigned long long Aflag, Bflag, mcount;
 extern char	*label;
 extern const char *color;
@@ -134,7 +134,6 @@ extern fastgrep_t *fg_pattern;
 extern char	 re_error[RE_ERROR_BUF + 1];	/* Seems big enough */
 
 /* util.c */
-bool	 dir_matching(const char *dname);
 bool	 file_matching(const char *fname);
 int	 procfile(const char *fn);
 int	 grep_tree(char **argv);
@@ -153,8 +152,6 @@ void	 clearqueue(void);
 void		 grep_close(struct file *f);
 struct file	*grep_stdin_open(void);
 struct file	*grep_open(const char *path);
-int		 grep_feof(struct file *f);
-int		 grep_fgetc(struct file *f);
 char		*grep_fgetln(struct file *f, size_t *len);
 
 /* fastgrep.c */

Modified: head/usr.bin/grep/queue.c
==============================================================================
--- head/usr.bin/grep/queue.c	Sun Aug 15 22:09:43 2010	(r211363)
+++ head/usr.bin/grep/queue.c	Sun Aug 15 22:15:04 2010	(r211364)
@@ -60,7 +60,7 @@ enqueue(struct str *x)
 	item->data.len = x->len;
 	item->data.line_no = x->line_no;
 	item->data.off = x->off;
-	strcpy(item->data.dat, x->dat);
+	memcpy(item->data.dat, x->dat, x->len);
 	item->data.file = x->file;
 
 	STAILQ_INSERT_TAIL(&queue, item, list);

Modified: head/usr.bin/grep/util.c
==============================================================================
--- head/usr.bin/grep/util.c	Sun Aug 15 22:09:43 2010	(r211363)
+++ head/usr.bin/grep/util.c	Sun Aug 15 22:15:04 2010	(r211364)
@@ -72,7 +72,7 @@ file_matching(const char *fname)
 	return (ret);
 }
 
-bool
+static inline bool
 dir_matching(const char *dname)
 {
 	bool ret;
@@ -144,9 +144,10 @@ grep_tree(char **argv)
 			if (dexclude || dinclude) {
 				if ((d = strrchr(p->fts_path, '/')) != NULL) {
 					dir = grep_malloc(sizeof(char) *
-					    (d - p->fts_path + 2));
-					strlcpy(dir, p->fts_path,
 					    (d - p->fts_path + 1));
+					memcpy(dir, p->fts_path,
+					    d - p->fts_path);
+					dir[d - p->fts_path] = '\0';
 				}
 				ok = dir_matching(dir);
 				free(dir);
@@ -276,7 +277,7 @@ procfile(const char *fn)
  * matches.  The matching lines are passed to printline() to display the
  * appropriate output.
  */
-static int
+static inline int
 procline(struct str *l, int nottext)
 {
 	regmatch_t matches[MAX_LINE_MATCHES];
@@ -317,30 +318,20 @@ procline(struct str *l, int nottext)
 					    (size_t)pmatch.rm_eo != l->len)
 						r = REG_NOMATCH;
 				/* Check for whole word match */
-				if (r == 0 && wflag && pmatch.rm_so != 0 &&
-				    (size_t)pmatch.rm_eo != l->len) {
-					wchar_t *wbegin;
-					wint_t wend;
-					size_t size;
+				if (r == 0 && wflag && pmatch.rm_so != 0) {
+					wint_t wbegin, wend;
 
-					size = mbstowcs(NULL, l->dat,
-					    pmatch.rm_so);
-
-					if (size == ((size_t) - 1))
+					wbegin = wend = L' ';
+					if (pmatch.rm_so != 0 &&
+					    sscanf(&l->dat[pmatch.rm_so - 1],
+					    "%lc", &wbegin) != 1)
+						r = REG_NOMATCH;
+					else if ((size_t)pmatch.rm_eo != l->len &&
+					    sscanf(&l->dat[pmatch.rm_eo],
+					    "%lc", &wend) != 1)
+						r = REG_NOMATCH;
+					else if (iswword(wbegin) || iswword(wend))
 						r = REG_NOMATCH;
-					else {
-						wbegin = grep_malloc(size);
-						if (mbstowcs(wbegin, l->dat,
-						    pmatch.rm_so) == ((size_t) - 1))
-							r = REG_NOMATCH;
-						else if (sscanf(&l->dat[pmatch.rm_eo],
-						    "%lc", &wend) != 1)
-							r = REG_NOMATCH;
-						else if (iswword(wbegin[wcslen(wbegin)]) ||
-						    iswword(wend))
-							r = REG_NOMATCH;
-						free(wbegin);
-					}
 				}
 				if (r == 0) {
 					if (m == 0)


More information about the svn-src-all mailing list