svn commit: r302594 - head/usr.bin/tr
Andrey A. Chernov
ache at FreeBSD.org
Mon Jul 11 21:23:51 UTC 2016
Author: ache
Date: Mon Jul 11 21:23:50 2016
New Revision: 302594
URL: https://svnweb.freebsd.org/changeset/base/302594
Log:
1) Following r302512 (remove collation support for [a-z]-ranges in libc)
remove collation support for a-z ranges here too.
It was implemented for single byte locales only in any case.
2) Reduce [Cc]flag loop to WCHAR_MAX, WINT_MAX here includes WEOF which is
not a character.
3) Optimize [Cc]flag case: don't repeatedly add the last character of
string2 to squeeze cset when string2 reach its EOS state.
4) Reflect in the manpage that [=equiv=] is implemented for single
byte locales only.
Modified:
head/usr.bin/tr/str.c
head/usr.bin/tr/tr.1
head/usr.bin/tr/tr.c
Modified: head/usr.bin/tr/str.c
==============================================================================
--- head/usr.bin/tr/str.c Mon Jul 11 20:15:46 2016 (r302593)
+++ head/usr.bin/tr/str.c Mon Jul 11 21:23:50 2016 (r302594)
@@ -53,7 +53,7 @@ static int backslash(STR *, int *);
static int bracket(STR *);
static void genclass(STR *);
static void genequiv(STR *);
-static int genrange(STR *, int);
+static int genrange(STR *);
static void genseq(STR *);
wint_t
@@ -93,7 +93,7 @@ next(STR *s)
}
/* We can start a range at any time. */
- if (s->str[0] == '-' && genrange(s, is_octal))
+ if (s->str[0] == '-' && genrange(s))
return (next(s));
return (1);
case RANGE:
@@ -237,18 +237,16 @@ genequiv(STR *s)
}
static int
-genrange(STR *s, int was_octal)
+genrange(STR *s)
{
- int stopval, octal;
+ int stopval;
char *savestart;
- int n, cnt, *p;
size_t clen;
wchar_t wc;
- octal = 0;
savestart = s->str;
if (*++s->str == '\\')
- stopval = backslash(s, &octal);
+ stopval = backslash(s, NULL);
else {
clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
if (clen == (size_t)-1 || clen == (size_t)-2)
@@ -256,37 +254,13 @@ genrange(STR *s, int was_octal)
stopval = wc;
s->str += clen;
}
- /*
- * XXX Characters are not ordered according to collating sequence in
- * multibyte locales.
- */
- if (octal || was_octal || MB_CUR_MAX > 1) {
- if (stopval < s->lastch) {
- s->str = savestart;
- return (0);
- }
- s->cnt = stopval - s->lastch + 1;
- s->state = RANGE;
- --s->lastch;
- return (1);
- }
- if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
+ if (stopval < s->lastch) {
s->str = savestart;
return (0);
}
- if ((s->set = p = malloc((NCHARS_SB + 1) * sizeof(int))) == NULL)
- err(1, "genrange() malloc");
- for (cnt = 0; cnt < NCHARS_SB; cnt++)
- if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 &&
- charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
- *p++ = cnt;
- *p = OOBCH;
- n = p - s->set;
-
- s->cnt = 0;
- s->state = SET;
- if (n > 1)
- mergesort(s->set, n, sizeof(*(s->set)), charcoll);
+ s->cnt = stopval - s->lastch + 1;
+ s->state = RANGE;
+ --s->lastch;
return (1);
}
Modified: head/usr.bin/tr/tr.1
==============================================================================
--- head/usr.bin/tr/tr.1 Mon Jul 11 20:15:46 2016 (r302593)
+++ head/usr.bin/tr/tr.1 Mon Jul 11 21:23:50 2016 (r302594)
@@ -164,14 +164,6 @@ as defined by the collation sequence.
If either or both of the range endpoints are octal sequences, it
represents the range of specific coded values between the
range endpoints, inclusive.
-.Pp
-.Bf Em
-See the
-.Sx COMPATIBILITY
-section below for an important note regarding
-differences in the way the current
-implementation interprets range expressions differently from
-previous implementations.
.Ef
.It [:class:]
Represents all characters belonging to the defined character class.
@@ -307,22 +299,16 @@ Remove diacritical marks from all accent
.Pp
.Dl "tr \*q[=e=]\*q \*qe\*q"
.Sh COMPATIBILITY
-Previous
.Fx
implementations of
.Nm
did not order characters in range expressions according to the current
-locale's collation order, making it possible to convert unaccented Latin
-characters (esp.\& as found in English text) from upper to lower case using
+locale's collation order, making it possible to convert accented Latin
+characters from upper to lower case using
the traditional
.Ux
idiom of
.Dq Li "tr A-Z a-z" .
-Since
-.Nm
-now obeys the locale's collation order, this idiom may not produce
-correct results when there is not a 1:1 mapping between lower and
-upper case, or when the order of characters within the two cases differs.
As noted in the
.Sx EXAMPLES
section above, the character class expressions
@@ -334,6 +320,9 @@ should be used instead of explicit chara
and
.Dq Li A-Z .
.Pp
+.Dq Li [=equiv=]
+expression is implemented for single byte locales only.
+.Pp
System V has historically implemented character ranges using the syntax
.Dq Li [c-c]
instead of the
Modified: head/usr.bin/tr/tr.c
==============================================================================
--- head/usr.bin/tr/tr.c Mon Jul 11 20:15:46 2016 (r302593)
+++ head/usr.bin/tr/tr.c Mon Jul 11 21:23:50 2016 (r302594)
@@ -68,10 +68,8 @@ static void usage(void);
int
main(int argc, char **argv)
{
- static int carray[NCHARS_SB];
struct cmap *map;
struct cset *delete, *squeeze;
- int n, *p;
int Cflag, cflag, dflag, sflag, isstring2;
wint_t ch, cnt, lastch;
@@ -254,7 +252,7 @@ main(int argc, char **argv)
(void)next(&s2);
}
endloop:
- if (cflag || (Cflag && MB_CUR_MAX > 1)) {
+ if (cflag || Cflag) {
/*
* This is somewhat tricky: since the character set is
* potentially huge, we need to avoid allocating a map
@@ -268,14 +266,15 @@ endloop:
*/
s2.str = argv[1];
s2.state = NORMAL;
- for (cnt = 0; cnt < WINT_MAX; cnt++) {
+ for (cnt = 0; cnt <= WCHAR_MAX; cnt++) {
if (Cflag && !iswrune(cnt))
continue;
if (cmap_lookup(map, cnt) == OOBCH) {
- if (next(&s2))
+ if (next(&s2)) {
cmap_add(map, cnt, s2.lastch);
- if (sflag)
- cset_add(squeeze, s2.lastch);
+ if (sflag)
+ cset_add(squeeze, s2.lastch);
+ }
} else
cmap_add(map, cnt, cnt);
if ((s2.state == EOS || s2.state == INFINITE) &&
@@ -283,30 +282,6 @@ endloop:
break;
}
cmap_default(map, s2.lastch);
- } else if (Cflag) {
- for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
- if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
- *p++ = cnt;
- else
- cmap_add(map, cnt, cnt);
- }
- n = p - carray;
- if (Cflag && n > 1)
- (void)mergesort(carray, n, sizeof(*carray), charcoll);
-
- s2.str = argv[1];
- s2.state = NORMAL;
- for (cnt = 0; cnt < n; cnt++) {
- (void)next(&s2);
- cmap_add(map, carray[cnt], s2.lastch);
- /*
- * Chars taken from s2 can be different this time
- * due to lack of complex upper/lower processing,
- * so fill string2 again to not miss some.
- */
- if (sflag)
- cset_add(squeeze, s2.lastch);
- }
}
cset_cache(squeeze);
@@ -351,16 +326,6 @@ setup(char *arg, STR *str, int cflag, in
return (cs);
}
-int
-charcoll(const void *a, const void *b)
-{
- static char sa[2], sb[2];
-
- sa[0] = *(const int *)a;
- sb[0] = *(const int *)b;
- return (strcoll(sa, sb));
-}
-
static void
usage(void)
{
More information about the svn-src-all
mailing list