bin/78562: Add numerical sorting option to join(1)
Dmitrij Tejblum
tejblum at yandex-team.ru
Mon Mar 7 21:20:03 GMT 2005
>Number: 78562
>Category: bin
>Synopsis: Add numerical sorting option to join(1)
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: change-request
>Submitter-Id: current-users
>Arrival-Date: Mon Mar 07 21:20:02 GMT 2005
>Closed-Date:
>Last-Modified:
>Originator: Dmitrij Tejblum
>Release: FreeBSD 5.4-PRERELEASE i386
>Organization:
>Environment:
>Description:
join(1) require input files to be lexicographically sorted. Thus if someone
has a numerically sorted files he must resort them in the lexicographic
order. It is inconvenient.
Linux (GNU) join(1) has an -n option to handle numerically sorted files.
>How-To-Repeat:
>Fix:
--- join.c Mon Mar 7 23:14:49 2005
+++ join.c Mon Mar 7 23:52:19 2005
@@ -105,7 +105,8 @@
static wchar_t default_tabchar[] = L" \t";
wchar_t *tabchar = default_tabchar;/* delimiter characters (-t) */
-int cmp(LINE *, u_long, LINE *, u_long);
+int cmp(LINE *, u_long, LINE *, u_long, int);
+int cmpnum(long long, long long);
void fieldarg(char *);
void joinlines(INPUT *, INPUT *);
int mbscoll(const char *, const char *);
@@ -114,7 +115,7 @@
void outfield(LINE *, u_long, int);
void outoneline(INPUT *, LINE *);
void outtwoline(INPUT *, LINE *, INPUT *, LINE *);
-void slurp(INPUT *);
+void slurp(INPUT *, int);
wchar_t *towcs(const char *);
void usage(void);
@@ -122,7 +123,7 @@
main(int argc, char *argv[])
{
INPUT *F1, *F2;
- int aflag, ch, cval, vflag;
+ int aflag, ch, cval, nflag, vflag;
char *end;
setlocale(LC_ALL, "");
@@ -130,9 +131,9 @@
F1 = &input1;
F2 = &input2;
- aflag = vflag = 0;
+ aflag = nflag = vflag = 0;
obsolete(argv);
- while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) {
+ while ((ch = getopt(argc, argv, "\01na:e:j:1:2:o:t:v:")) != -1) {
switch (ch) {
case '\01': /* See comment in obsolete(). */
aflag = 1;
@@ -180,6 +181,9 @@
--F1->joinf;
--F2->joinf;
break;
+ case 'n':
+ nflag = 1;
+ break;
case 'o':
fieldarg(optarg);
break;
@@ -234,26 +238,26 @@
if (F1->fp == stdin && F2->fp == stdin)
errx(1, "only one input file may be stdin");
- slurp(F1);
- slurp(F2);
+ slurp(F1, nflag);
+ slurp(F2, nflag);
while (F1->setcnt && F2->setcnt) {
- cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf);
+ cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf, nflag);
if (cval == 0) {
/* Oh joy, oh rapture, oh beauty divine! */
if (joinout)
joinlines(F1, F2);
- slurp(F1);
- slurp(F2);
+ slurp(F1, nflag);
+ slurp(F2, nflag);
} else if (cval < 0) {
/* File 1 takes the lead... */
if (F1->unpair)
joinlines(F1, NULL);
- slurp(F1);
+ slurp(F1, nflag);
} else {
/* File 2 takes the lead... */
if (F2->unpair)
joinlines(F2, NULL);
- slurp(F2);
+ slurp(F2, nflag);
}
}
@@ -264,18 +268,18 @@
if (F1->unpair)
while (F1->setcnt) {
joinlines(F1, NULL);
- slurp(F1);
+ slurp(F1, nflag);
}
if (F2->unpair)
while (F2->setcnt) {
joinlines(F2, NULL);
- slurp(F2);
+ slurp(F2, nflag);
}
exit(0);
}
void
-slurp(INPUT *F)
+slurp(INPUT *F, int nflag)
{
LINE *lp, *lastlp, tmp;
size_t len;
@@ -355,7 +359,7 @@
}
/* See if the join field value has changed. */
- if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) {
+ if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf, nflag)) {
F->pushbool = 1;
F->pushback = F->setcnt;
break;
@@ -393,13 +397,25 @@
}
int
-cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2)
+cmpnum(long long a, long long b)
+{
+ if (a < b)
+ return (-1);
+ else if (a == b)
+ return 0;
+ else
+ return 1;
+}
+
+int
+cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2, int nflag)
{
if (lp1->fieldcnt <= fieldno1)
return (lp2->fieldcnt <= fieldno2 ? 0 : 1);
if (lp2->fieldcnt <= fieldno2)
return (-1);
- return (mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2]));
+ return (nflag ? cmpnum(atoll(lp1->fields[fieldno1]), atoll(lp2->fields[fieldno2])):
+ mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2]));
}
int
@@ -664,6 +680,6 @@
(void)fprintf(stderr, "%s %s\n%s\n",
"usage: join [-a fileno | -v fileno ] [-e string] [-1 field]",
"[-2 field]",
- " [-o list] [-t char] file1 file2");
+ " [-o list] [-n] [-t char] file1 file2");
exit(1);
}
--- join.1 Mon Mar 7 23:39:17 2005
+++ join.1 Tue Mar 8 00:00:27 2005
@@ -50,6 +50,7 @@
.Op Fl o Ar list
.Bk -words
.Ek
+.Op Fl n
.Op Fl t Ar char
.Op Fl \&1 Ar field
.Op Fl \&2 Ar field
@@ -93,6 +94,8 @@
.It Fl e Ar string
Replace empty output fields with
.Ar string .
+.It Fl n
+Assume numerically sorted input files.
.It Fl o Ar list
The
.Fl o
@@ -158,6 +161,13 @@
without the
.Fl b
option.
+When the option
+.Fl n
+is used, the files to be joined should be ordered as with
+.Xr sort 1
+with
+.Fl n
+option.
.Pp
If one of the arguments
.Ar file1
@@ -211,6 +221,11 @@
.Nm
command conforms to
.St -p1003.1-2001 .
+The
+.Fl n
+option is a non-standard
+.Fx
+extension.
.Sh SEE ALSO
.Xr awk 1 ,
.Xr comm 1 ,
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-bugs
mailing list