svn commit: r211429 - stable/8/usr.bin/uniq
Jaakko Heinonen
jh at FreeBSD.org
Tue Aug 17 15:07:23 UTC 2010
Author: jh
Date: Tue Aug 17 15:07:23 2010
New Revision: 211429
URL: http://svn.freebsd.org/changeset/base/211429
Log:
MFC r200633:
Sync getline() with comm(1):
- Prevent overflowing of the buffer length variable in getline() by
limiting its maximum value.
- Exit if reallocf(3) fails in getline(). Failure was silently
considered as end-of-file.
MFC r204803 by ache:
1) Rewrite input processing to not exit with error on the first EILSEQ found
in the input data but fallback to "binary equal" check instead.
POSIX says: "The input file shall be a text file", nothing more,
so the text file with illegal sequence is valid input.
BTW, GNU sort does not fails on EILSEQ too.
2) Speedup input processing a bit in complex cases like skipping fields,
chars or ignore case.
3) Enforce the implied LINE_MAX limit (from POSIX definition of "text file"
and POSIX uniq(1) description).
MFC r204811 by ache:
Remove vestiges of old %-format which prevents build on amd64
MFC r204876 by ache:
1) Reimplement (differently) unlimited line length restricted in prev.
commit.
2) Honor missing the very last \n (if absent) on output.
MFC r204927 by ache:
Add SIZE_MAX overflow check
Modified:
stable/8/usr.bin/uniq/uniq.c
Directory Properties:
stable/8/usr.bin/uniq/ (props changed)
Modified: stable/8/usr.bin/uniq/uniq.c
==============================================================================
--- stable/8/usr.bin/uniq/uniq.c Tue Aug 17 15:02:33 2010 (r211428)
+++ stable/8/usr.bin/uniq/uniq.c Tue Aug 17 15:07:23 2010 (r211429)
@@ -52,6 +52,8 @@ static const char rcsid[] =
#include <err.h>
#include <limits.h>
#include <locale.h>
+#include <stdint.h>
+#define _WITH_GETLINE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -59,30 +61,26 @@ static const char rcsid[] =
#include <wchar.h>
#include <wctype.h>
-#define MAXLINELEN (LINE_MAX + 1)
-
-int cflag, dflag, uflag;
+int cflag, dflag, uflag, iflag;
int numchars, numfields, repeats;
FILE *file(const char *, const char *);
-wchar_t *getline(wchar_t *, size_t *, FILE *);
-void show(FILE *, wchar_t *);
+wchar_t *convert(const char *);
+int inlcmp(const char *, const char *);
+void show(FILE *, const char *);
wchar_t *skip(wchar_t *);
void obsolete(char *[]);
static void usage(void);
-int wcsicoll(wchar_t *, wchar_t *);
int
main (int argc, char *argv[])
{
- wchar_t *t1, *t2;
+ wchar_t *tprev, *tthis;
FILE *ifp, *ofp;
- int ch, b1;
- size_t prevbuflen, thisbuflen;
- wchar_t *prevline, *thisline;
- char *p;
+ int ch, comp;
+ size_t prevbuflen, thisbuflen, b1;
+ char *prevline, *thisline, *p;
const char *ifn;
- int iflag = 0, comp;
(void) setlocale(LC_ALL, "");
@@ -137,48 +135,48 @@ main (int argc, char *argv[])
if (argc > 1)
ofp = file(argv[1], "w");
- prevbuflen = MAXLINELEN;
- thisbuflen = MAXLINELEN;
- prevline = malloc(prevbuflen * sizeof(*prevline));
- thisline = malloc(thisbuflen * sizeof(*thisline));
- if (prevline == NULL || thisline == NULL)
- err(1, "malloc");
+ prevbuflen = thisbuflen = 0;
+ prevline = thisline = NULL;
- if ((prevline = getline(prevline, &prevbuflen, ifp)) == NULL) {
+ if (getline(&prevline, &prevbuflen, ifp) < 0) {
if (ferror(ifp))
err(1, "%s", ifn);
exit(0);
}
+ tprev = convert(prevline);
+
if (!cflag && uflag && dflag)
show(ofp, prevline);
- while ((thisline = getline(thisline, &thisbuflen, ifp)) != NULL) {
- /* If requested get the chosen fields + character offsets. */
- if (numfields || numchars) {
- t1 = skip(thisline);
- t2 = skip(prevline);
- } else {
- t1 = thisline;
- t2 = prevline;
- }
-
- /* If different, print; set previous to new value. */
- if (iflag)
- comp = wcsicoll(t1, t2);
+ tthis = NULL;
+ while (getline(&thisline, &thisbuflen, ifp) >= 0) {
+ if (tthis != NULL)
+ free(tthis);
+ tthis = convert(thisline);
+
+ if (tthis == NULL && tprev == NULL)
+ comp = inlcmp(thisline, prevline);
+ else if (tthis == NULL || tprev == NULL)
+ comp = 1;
else
- comp = wcscoll(t1, t2);
+ comp = wcscoll(tthis, tprev);
if (comp) {
+ /* If different, print; set previous to new value. */
if (cflag || !dflag || !uflag)
show(ofp, prevline);
- t1 = prevline;
+ p = prevline;
b1 = prevbuflen;
prevline = thisline;
prevbuflen = thisbuflen;
+ if (tprev != NULL)
+ free(tprev);
+ tprev = tthis;
if (!cflag && uflag && dflag)
show(ofp, prevline);
- thisline = t1;
+ thisline = p;
thisbuflen = b1;
+ tthis = NULL;
repeats = 0;
} else
++repeats;
@@ -191,25 +189,55 @@ main (int argc, char *argv[])
}
wchar_t *
-getline(wchar_t *buf, size_t *buflen, FILE *fp)
+convert(const char *str)
{
- size_t bufpos;
- wint_t ch;
+ size_t n;
+ wchar_t *buf, *ret, *p;
- bufpos = 0;
- while ((ch = getwc(fp)) != WEOF && ch != '\n') {
- if (bufpos + 2 >= *buflen) {
- *buflen = *buflen * 2;
- buf = reallocf(buf, *buflen * sizeof(*buf));
- if (buf == NULL)
- return (NULL);
- }
- buf[bufpos++] = ch;
+ if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
+ return (NULL);
+ if (SIZE_MAX / sizeof(*buf) < n + 1)
+ errx(1, "conversion buffer length overflow");
+ if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
+ err(1, "malloc");
+ if (mbstowcs(buf, str, n + 1) != n)
+ errx(1, "internal mbstowcs() error");
+ /* The last line may not end with \n. */
+ if (n > 0 && buf[n - 1] == L'\n')
+ buf[n - 1] = L'\0';
+
+ /* If requested get the chosen fields + character offsets. */
+ if (numfields || numchars) {
+ if ((ret = wcsdup(skip(buf))) == NULL)
+ err(1, "wcsdup");
+ free(buf);
+ } else
+ ret = buf;
+
+ if (iflag) {
+ for (p = ret; *p != L'\0'; p++)
+ *p = towlower(*p);
}
- if (bufpos + 1 != *buflen)
- buf[bufpos] = '\0';
- return (bufpos != 0 || ch == '\n' ? buf : NULL);
+ return (ret);
+}
+
+int
+inlcmp(const char *s1, const char *s2)
+{
+ int c1, c2;
+
+ while (*s1 == *s2++)
+ if (*s1++ == '\0')
+ return (0);
+ c1 = (unsigned char)*s1;
+ c2 = (unsigned char)*(s2 - 1);
+ /* The last line may not end with \n. */
+ if (c1 == '\n')
+ c1 = '\0';
+ if (c2 == '\n')
+ c2 = '\0';
+ return (c1 - c2);
}
/*
@@ -218,13 +246,13 @@ getline(wchar_t *buf, size_t *buflen, FI
* of the line.
*/
void
-show(FILE *ofp, wchar_t *str)
+show(FILE *ofp, const char *str)
{
if (cflag)
- (void)fprintf(ofp, "%4d %ls\n", repeats + 1, str);
+ (void)fprintf(ofp, "%4d %s", repeats + 1, str);
if ((dflag && repeats) || (uflag && !repeats))
- (void)fprintf(ofp, "%ls\n", str);
+ (void)fprintf(ofp, "%s", str);
}
wchar_t *
@@ -232,13 +260,14 @@ skip(wchar_t *str)
{
int nchars, nfields;
- for (nfields = 0; *str != '\0' && nfields++ != numfields; ) {
+ for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
while (iswblank(*str))
str++;
- while (*str != '\0' && !iswblank(*str))
+ while (*str != L'\0' && !iswblank(*str))
str++;
}
- for (nchars = numchars; nchars-- && *str; ++str);
+ for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
+ ;
return(str);
}
@@ -288,52 +317,3 @@ usage(void)
"usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
exit(1);
}
-
-static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
-static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
-
-int
-wcsicoll(wchar_t *s1, wchar_t *s2)
-{
- wchar_t *p;
- size_t l1, l2;
- size_t new_l1_buflen, new_l2_buflen;
-
- l1 = wcslen(s1) + 1;
- l2 = wcslen(s2) + 1;
- new_l1_buflen = wcsicoll_l1_buflen;
- new_l2_buflen = wcsicoll_l2_buflen;
- while (new_l1_buflen < l1) {
- if (new_l1_buflen == 0)
- new_l1_buflen = MAXLINELEN;
- else
- new_l1_buflen *= 2;
- }
- while (new_l2_buflen < l2) {
- if (new_l2_buflen == 0)
- new_l2_buflen = MAXLINELEN;
- else
- new_l2_buflen *= 2;
- }
- if (new_l1_buflen > wcsicoll_l1_buflen) {
- wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
- if (wcsicoll_l1_buf == NULL)
- err(1, "reallocf");
- wcsicoll_l1_buflen = new_l1_buflen;
- }
- if (new_l2_buflen > wcsicoll_l2_buflen) {
- wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
- if (wcsicoll_l2_buf == NULL)
- err(1, "reallocf");
- wcsicoll_l2_buflen = new_l2_buflen;
- }
-
- for (p = wcsicoll_l1_buf; *s1; s1++)
- *p++ = towlower(*s1);
- *p = '\0';
- for (p = wcsicoll_l2_buf; *s2; s2++)
- *p++ = towlower(*s2);
- *p = '\0';
-
- return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
-}
More information about the svn-src-stable-8
mailing list