bin/76578: uniq truncates long lines to LINE_MAX (2048)

Scot W. Hetzel zxycba2004 at yahoo.com
Sat Jan 22 13:20:39 PST 2005


>Number:         76578
>Category:       bin
>Synopsis:       uniq truncates long lines to LINE_MAX
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat Jan 22 21:20:37 GMT 2005
>Closed-Date:
>Last-Modified:
>Originator:     Scot W. Hetzel
>Release:        FreeBSD 5.3-STABLE i386
>Organization:
>Environment:
System: FreeBSD bsd5x.hetzel.org 5.3-STABLE FreeBSD
5.3-STABLE #6: Sun Jan 16 03:44:45 CST 2005
root at bsd5x.hetzel.org:/usr/obj/usr/src/5x/sys/GENERIC
i386

>Description:
I noticed that if you have a file with long lines
(>2048 bytes), uniq(1) will
truncate the line to 2048 bytes.  This truncation is
caused by the getline
routine in src/usr.bin/uniq/uniq.c, where if it
reaches the buflen-2, but not
the end of a line, it discards the remainder of the
line or until it gets WEOF.

I consider this a bug, as uniq(1) shouldn't be
discarding the end of a line.

If sort(1) is used on the same file, the line isn't
truncated.

>How-To-Repeat:
A simple way to repeat is to do the following:

  cd /usr/ports/accessibility/gnomemag
  make fetch-list > tmp.list
  make fetch-list >> tmp.list
  uniq tmp.list > tmp2.list
  ls -l tmp*list

  -rw-r--r-- 1 root wheel 4540 Jan 22 03:31 tmp.list
  -rw-r--r-- 1 root wheel 2048 Jan 22 03:32 tmp2.list

tmp2.list should be half the size of tmp.list, but
instead its 2048 bytes.

NOTE: make sure you don't have the distfile for
gnomemag before using fetch-list

>Fix:
The following patch makes it so that uniq no-longer
truncates long lines.

NOTE: I haven't figured out how to pass 'buflen' to
'thisbuflen' or 'prevbuflen'
from getline.  This will cause getline to realloc
thisline again, when the line
is greater than LINE_MAX, instead of greater than new
buflen/sizeof(*thisline).

Index: uniq.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/uniq/uniq.c,v
retrieving revision 1.25
diff -u -r1.25 uniq.c
--- uniq.c	2 Jul 2004 23:43:05 -0000	1.25
+++ uniq.c	22 Jan 2005 09:14:38 -0000
@@ -50,6 +50,7 @@
 
 #include <ctype.h>
 #include <err.h>
+#include <errno.h>
 #include <limits.h>
 #include <locale.h>
 #include <stdio.h>
@@ -71,13 +72,15 @@
 void	 obsolete(char *[]);
 static void	 usage(void);
 int      wcsicoll(wchar_t *, wchar_t *);
+static wchar_t *__wcsalloc(wchar_t *);
 
 int
 main (int argc, char *argv[])
 {
 	wchar_t *t1, *t2;
 	FILE *ifp, *ofp;
-	int ch;
+	int ch, b1;
+	size_t prevbuflen, thisbuflen;
 	wchar_t *prevline, *thisline;
 	char *p;
 	const char *ifn;
@@ -136,20 +139,24 @@
 	if (argc > 1)
 		ofp = file(argv[1], "w");
 
-	prevline = malloc(MAXLINELEN * sizeof(*prevline));
-	thisline = malloc(MAXLINELEN * sizeof(*thisline));
+	prevbuflen = MAXLINELEN * sizeof(*prevline);
+	thisbuflen = MAXLINELEN * sizeof(*thisline);
+	prevline = malloc(prevbuflen);
+	thisline = malloc(thisbuflen);
+
 	if (prevline == NULL || thisline == NULL)
 		err(1, "malloc");
 
-	if (getline(prevline, MAXLINELEN, ifp) == NULL) {
+	if (getline(prevline, prevbuflen, ifp) == NULL) {
 		if (ferror(ifp))
 			err(1, "%s", ifp == stdin ? "stdin" : argv[0]);
 		exit(0);
 	}
+
 	if (!cflag && uflag && dflag)
 		show(ofp, prevline);
 
-	while (getline(thisline, MAXLINELEN, ifp)) {
+	while (getline(thisline, thisbuflen, ifp)) {
 		/* If requested get the chosen fields + character
offsets. */
 		if (numfields || numchars) {
 			t1 = skip(thisline);
@@ -169,10 +176,13 @@
 			if (cflag || !dflag || !uflag)
 				show(ofp, prevline);
 			t1 = prevline;
+			b1 = prevbuflen;
 			prevline = thisline;
+			prevbuflen = thisbuflen;
 			if (!cflag && uflag && dflag)
 				show(ofp, prevline);
 			thisline = t1;
+			thisbuflen = b1;
 			repeats = 0;
 		} else
 			++repeats;
@@ -191,12 +201,15 @@
 	wint_t ch;
 
 	bufpos = 0;
-	while (bufpos + 2 != buflen && (ch = getwc(fp)) !=
WEOF && ch != '\n')
+	while ((ch = getwc(fp)) != WEOF && ch != '\n') {
+		if ((bufpos + 1) == buflen) {
+			buflen = buflen + (1024 * sizeof(*buf));
+			buf = realloc(buf,buflen);
+		}
 		buf[bufpos++] = ch;
+	}
 	if (bufpos + 1 != buflen)
 		buf[bufpos] = '\0';
-	while (ch != WEOF && ch != '\n')
-		ch = getwc(fp);
 
 	return (bufpos != 0 || ch == '\n' ? buf : NULL);
 }
@@ -278,16 +291,42 @@
 	exit(1);
 }
 
+static wchar_t *
+__wcsalloc(wchar_t *ws)
+{
+	wchar_t *wcs;
+
+	if ((wcs = malloc(wcslen(ws) + 1)) == NULL)
+		return (NULL);
+
+	return (wcs);
+}
+
 int
 wcsicoll(wchar_t *s1, wchar_t *s2)
 {
-	wchar_t *p, line1[MAXLINELEN], line2[MAXLINELEN];
+	wchar_t *p, *l1, *l2;
+	int diff, sverrno;
+
+	if ((l1 = __wcsalloc(s1)) == NULL || (l2 =
__wcsalloc(s2)) == NULL) {
+		sverrno = errno;
+		free(l1);
+		errno = sverrno;
+		return(wcscmp(s1,s2));
+	}
 
-	for (p = line1; *s1; s1++)
+	for (p = l1; *s1; s1++)
 		*p++ = towlower(*s1);
 	*p = '\0';
-	for (p = line2; *s2; s2++)
+	for (p = l2; *s2; s2++)
 		*p++ = towlower(*s2);
 	*p = '\0';
-	return (wcscoll(line1, line2));
+
+	diff = wcscoll(l1,l2);
+	sverrno = errno;
+	free(l1);
+	free(l2);
+	errno = sverrno;
+
+	return (diff);
 }



__________________________________________________
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 
>Release-Note:
>Audit-Trail:
>Unformatted:
 X-send-pr-version: 3.113
 X-GNATS-Notify: 
 
 
 (2048)


More information about the freebsd-bugs mailing list