bin/76578: uniq truncates long lines to LINE_MAX (2048)
Scot W. Hetzel
zxycba2004 at yahoo.com
Sat Jan 22 13:20:39 PST 2005
>Number: 76578
>Category: bin
>Synopsis: uniq truncates long lines to LINE_MAX
>Confidential: no
>Severity: serious
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Sat Jan 22 21:20:37 GMT 2005
>Closed-Date:
>Last-Modified:
>Originator: Scot W. Hetzel
>Release: FreeBSD 5.3-STABLE i386
>Organization:
>Environment:
System: FreeBSD bsd5x.hetzel.org 5.3-STABLE FreeBSD
5.3-STABLE #6: Sun Jan 16 03:44:45 CST 2005
root at bsd5x.hetzel.org:/usr/obj/usr/src/5x/sys/GENERIC
i386
>Description:
I noticed that if you have a file with long lines
(>2048 bytes), uniq(1) will
truncate the line to 2048 bytes. This truncation is
caused by the getline
routine in src/usr.bin/uniq/uniq.c, where if it
reaches the buflen-2, but not
the end of a line, it discards the remainder of the
line or until it gets WEOF.
I consider this a bug, as uniq(1) shouldn't be
discarding the end of a line.
If sort(1) is used on the same file, the line isn't
truncated.
>How-To-Repeat:
A simple way to repeat is to do the following:
cd /usr/ports/accessibility/gnomemag
make fetch-list > tmp.list
make fetch-list >> tmp.list
uniq tmp.list > tmp2.list
ls -l tmp*list
-rw-r--r-- 1 root wheel 4540 Jan 22 03:31 tmp.list
-rw-r--r-- 1 root wheel 2048 Jan 22 03:32 tmp2.list
tmp2.list should be half the size of tmp.list, but
instead its 2048 bytes.
NOTE: make sure you don't have the distfile for
gnomemag before using fetch-list
>Fix:
The following patch makes it so that uniq no-longer
truncates long lines.
NOTE: I haven't figured out how to pass 'buflen' to
'thisbuflen' or 'prevbuflen'
from getline. This will cause getline to realloc
thisline again, when the line
is greater than LINE_MAX, instead of greater than new
buflen/sizeof(*thisline).
Index: uniq.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/uniq/uniq.c,v
retrieving revision 1.25
diff -u -r1.25 uniq.c
--- uniq.c 2 Jul 2004 23:43:05 -0000 1.25
+++ uniq.c 22 Jan 2005 09:14:38 -0000
@@ -50,6 +50,7 @@
#include <ctype.h>
#include <err.h>
+#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
@@ -71,13 +72,15 @@
void obsolete(char *[]);
static void usage(void);
int wcsicoll(wchar_t *, wchar_t *);
+static wchar_t *__wcsalloc(wchar_t *);
int
main (int argc, char *argv[])
{
wchar_t *t1, *t2;
FILE *ifp, *ofp;
- int ch;
+ int ch, b1;
+ size_t prevbuflen, thisbuflen;
wchar_t *prevline, *thisline;
char *p;
const char *ifn;
@@ -136,20 +139,24 @@
if (argc > 1)
ofp = file(argv[1], "w");
- prevline = malloc(MAXLINELEN * sizeof(*prevline));
- thisline = malloc(MAXLINELEN * sizeof(*thisline));
+ prevbuflen = MAXLINELEN * sizeof(*prevline);
+ thisbuflen = MAXLINELEN * sizeof(*thisline);
+ prevline = malloc(prevbuflen);
+ thisline = malloc(thisbuflen);
+
if (prevline == NULL || thisline == NULL)
err(1, "malloc");
- if (getline(prevline, MAXLINELEN, ifp) == NULL) {
+ if (getline(prevline, prevbuflen, ifp) == NULL) {
if (ferror(ifp))
err(1, "%s", ifp == stdin ? "stdin" : argv[0]);
exit(0);
}
+
if (!cflag && uflag && dflag)
show(ofp, prevline);
- while (getline(thisline, MAXLINELEN, ifp)) {
+ while (getline(thisline, thisbuflen, ifp)) {
/* If requested get the chosen fields + character
offsets. */
if (numfields || numchars) {
t1 = skip(thisline);
@@ -169,10 +176,13 @@
if (cflag || !dflag || !uflag)
show(ofp, prevline);
t1 = prevline;
+ b1 = prevbuflen;
prevline = thisline;
+ prevbuflen = thisbuflen;
if (!cflag && uflag && dflag)
show(ofp, prevline);
thisline = t1;
+ thisbuflen = b1;
repeats = 0;
} else
++repeats;
@@ -191,12 +201,15 @@
wint_t ch;
bufpos = 0;
- while (bufpos + 2 != buflen && (ch = getwc(fp)) !=
WEOF && ch != '\n')
+ while ((ch = getwc(fp)) != WEOF && ch != '\n') {
+ if ((bufpos + 1) == buflen) {
+ buflen = buflen + (1024 * sizeof(*buf));
+ buf = realloc(buf,buflen);
+ }
buf[bufpos++] = ch;
+ }
if (bufpos + 1 != buflen)
buf[bufpos] = '\0';
- while (ch != WEOF && ch != '\n')
- ch = getwc(fp);
return (bufpos != 0 || ch == '\n' ? buf : NULL);
}
@@ -278,16 +291,42 @@
exit(1);
}
+static wchar_t *
+__wcsalloc(wchar_t *ws)
+{
+ wchar_t *wcs;
+
+ if ((wcs = malloc(wcslen(ws) + 1)) == NULL)
+ return (NULL);
+
+ return (wcs);
+}
+
int
wcsicoll(wchar_t *s1, wchar_t *s2)
{
- wchar_t *p, line1[MAXLINELEN], line2[MAXLINELEN];
+ wchar_t *p, *l1, *l2;
+ int diff, sverrno;
+
+ if ((l1 = __wcsalloc(s1)) == NULL || (l2 =
__wcsalloc(s2)) == NULL) {
+ sverrno = errno;
+ free(l1);
+ errno = sverrno;
+ return(wcscmp(s1,s2));
+ }
- for (p = line1; *s1; s1++)
+ for (p = l1; *s1; s1++)
*p++ = towlower(*s1);
*p = '\0';
- for (p = line2; *s2; s2++)
+ for (p = l2; *s2; s2++)
*p++ = towlower(*s2);
*p = '\0';
- return (wcscoll(line1, line2));
+
+ diff = wcscoll(l1,l2);
+ sverrno = errno;
+ free(l1);
+ free(l2);
+ errno = sverrno;
+
+ return (diff);
}
__________________________________________________
Do You Yahoo!?
Tired of spam? Yahoo! Mail has the best spam protection around
http://mail.yahoo.com
>Release-Note:
>Audit-Trail:
>Unformatted:
X-send-pr-version: 3.113
X-GNATS-Notify:
(2048)
More information about the freebsd-bugs
mailing list