git: 5df6aca10906 - main - ed: add unicode support for the l (list) command
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 01 Mar 2026 11:29:00 UTC
The branch main has been updated by bapt:
URL: https://cgit.FreeBSD.org/src/commit/?id=5df6aca10906f669bc7095ff735eba6b8fe95ff0
commit 5df6aca10906f669bc7095ff735eba6b8fe95ff0
Author: Baptiste Daroussin <bapt@FreeBSD.org>
AuthorDate: 2026-02-17 15:25:46 +0000
Commit: Baptiste Daroussin <bapt@FreeBSD.org>
CommitDate: 2026-03-01 11:28:29 +0000
ed: add unicode support for the l (list) command
Use mbrtowc()/iswprint()/wcwidth() in put_tty_line() so that
the l command displays valid multibyte characters as-is instead
of escaping each byte as octal.
Column wrapping now correctly accounts for character display
width (including double-width CJK characters).
Invalid or incomplete UTF-8 sequences and non-printable
characters are still escaped as octal.
Differential Revision: https://reviews.freebsd.org/D55365
---
bin/ed/ed.h | 2 ++
bin/ed/io.c | 70 +++++++++++++++++++++++++++++++++++++++++++++----------------
2 files changed, 54 insertions(+), 18 deletions(-)
diff --git a/bin/ed/ed.h b/bin/ed/ed.h
index e1e41cf3fe40..fdfaf47acb4f 100644
--- a/bin/ed/ed.h
+++ b/bin/ed/ed.h
@@ -36,6 +36,8 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
#define ERR (-2)
#define EMOD (-3)
diff --git a/bin/ed/io.c b/bin/ed/io.c
index b3262ea9e217..3a1b6d8bb443 100644
--- a/bin/ed/io.c
+++ b/bin/ed/io.c
@@ -298,13 +298,49 @@ put_tty_line(const char *s, int l, long n, int gflag)
int col = 0;
int lc = 0;
char *cp;
+ wchar_t wc;
+ mbstate_t mbs;
+ size_t clen;
+ int w;
if (gflag & GNP) {
printf("%ld\t", n);
col = 8;
}
- for (; l--; s++) {
- if ((gflag & GLS) && ++col > cols) {
+ for (; l > 0;) {
+ if (!(gflag & GLS)) {
+ putchar(*s++);
+ l--;
+ continue;
+ }
+ /* GLS mode: try to decode a multibyte character */
+ memset(&mbs, 0, sizeof(mbs));
+ clen = mbrtowc(&wc, s, l, &mbs);
+ if (clen != (size_t)-1 && clen != (size_t)-2 &&
+ clen > 1 && iswprint(wc) && (w = wcwidth(wc)) >= 0) {
+ /* printable multibyte character */
+ if (col + w > cols) {
+ fputs("\\\n", stdout);
+ col = 0;
+#ifndef BACKWARDS
+ if (!scripted && !isglobal && ++lc > rows) {
+ lc = 0;
+ fputs("Press <RETURN> to continue... ",
+ stdout);
+ fflush(stdout);
+ if (get_tty_line() < 0)
+ return ERR;
+ }
+#endif
+ }
+ col += w;
+ fwrite(s, 1, clen, stdout);
+ s += clen;
+ l -= clen;
+ continue;
+ }
+ /* single byte: ASCII printable, escape sequence, or octal */
+ if (++col > cols) {
fputs("\\\n", stdout);
col = 1;
#ifndef BACKWARDS
@@ -317,24 +353,22 @@ put_tty_line(const char *s, int l, long n, int gflag)
}
#endif
}
- if (gflag & GLS) {
- if (31 < *s && *s < 127 && *s != '\\')
- putchar(*s);
+ if (31 < *s && *s < 127 && *s != '\\')
+ putchar(*s);
+ else {
+ putchar('\\');
+ col++;
+ if (*s && (cp = strchr(ESCAPES, *s)) != NULL)
+ putchar(ESCCHARS[cp - ESCAPES]);
else {
- putchar('\\');
- col++;
- if (*s && (cp = strchr(ESCAPES, *s)) != NULL)
- putchar(ESCCHARS[cp - ESCAPES]);
- else {
- putchar((((unsigned char) *s & 0300) >> 6) + '0');
- putchar((((unsigned char) *s & 070) >> 3) + '0');
- putchar(((unsigned char) *s & 07) + '0');
- col += 2;
- }
+ putchar((((unsigned char) *s & 0300) >> 6) + '0');
+ putchar((((unsigned char) *s & 070) >> 3) + '0');
+ putchar(((unsigned char) *s & 07) + '0');
+ col += 2;
}
-
- } else
- putchar(*s);
+ }
+ s++;
+ l--;
}
#ifndef BACKWARDS
if (gflag & GLS)