printable characters in LC_CTYPE

Lena at lena.kiev.ua Lena at lena.kiev.ua
Sat Mar 17 14:33:14 UTC 2018


I wrote:

> In FreeBSD 10 or less (like 8.4) the file
> /usr/src/share/mklocale/ru_RU.KOI8-R.src
> contains a list of printable characters:
> 
> PRINT           0x20 - 0x7e 0x80 - 0xff
> 
> mklocale(1) converted that source file to binary file
> /usr/share/locale/ru_RU.KOI8-R/LC_CTYPE
> Apparently, this file is used by iswprint(3)
> 
> In FreeBSD 11.1 `man mklocale` says
> "mklocale has been replaced by localedef(1) in FreeBSD 11.0",
> and list of printable characters for LC_CTYPE=ru_RU.KOI8-R is different,
> excludes characters in 0x80-0xBF range including 0x9A (non-breaking space).
> As a consequence, ports mail/mutt14 and editors/aee
> which use iswprint(3)
> show garbage (escape sequences) instead of non-breaking space
> and some other characters.
> I need to change the binary file /usr/share/locale/ru_RU.KOI8-R/LC_CTYPE
> but how?
> I installed FreeBSD 11.1 i386 from memstick.img, downloaded sources.
> What to feed to localedef(1), where are the source files?
> Are they /usr/src/tools/tools/locale/etc/common.UTF-8.src
> and /usr/src/tools/tools/locale/etc/final-maps/map.KOI8-R ?
> But they seem to not specify which characters are printable.

I found definitions in /usr/include/_ctype.h :

#define _CTYPE_A        0x00000100L            /* Alpha */
#define _CTYPE_C        0x00000200L            /* Control */
#define _CTYPE_D        0x00000400L            /* Digit */
#define _CTYPE_G        0x00000800L            /* Graph */
#define _CTYPE_L        0x00001000L            /* Lower */
#define _CTYPE_P        0x00002000L            /* Punct */
#define _CTYPE_S        0x00004000L            /* Space */
#define _CTYPE_U        0x00008000L            /* Upper */
#define _CTYPE_X        0x00010000L            /* X digit */
#define _CTYPE_B        0x00020000L            /* Blank */
#define _CTYPE_R        0x00040000L            /* Print */
#define _CTYPE_I        0x00080000L            /* Ideogram */
#define _CTYPE_T        0x00100000L            /* Special */
#define _CTYPE_Q        0x00200000L            /* Phonogram */
#define _CTYPE_N        0x00400000L            /* Number (superset of digit) */

and an array of 256 4-byte words in the binary file
/usr/share/locale/ru_RU.KOI8-R/LC_CTYPE
at offset 0x28, with reverse (little-endian) byte order.
The array corresponds to characters 0x00 - 0xFF, for example 4 bytes
00 02 00 00 (0x00000200) at offset 0x028 for char 0x00 (a control character),
00 40 06 00 (0x00064000) at offset 0x0A8 for char 0x20 (blank),
00 00 00 00 (0x00000000) at offset 0x290 for char 0x9A (non-breaking space).

Using `mcedit` from port misc/mc (F4 key in Midnight Commander),
I edited that binary file in hex mode: changed each
"** ** *0 **"  to
"** ** *4 **"  at offsets 0x228 - 0x424 (128 times)
in order to mark characters 0x80 - 0xFF (including non-breaking space 0x9A)
as printable.

The same with /usr/share/locale/uk_UA.KOI8-U/LC_CTYPE

Also a patch for port mail/mutt14 :

--- pager.c.orig	2007-05-23 04:17:53.000000000 +0300
+++ pager.c	2018-03-17 11:08:28.792617000 +0200
@@ -1144,6 +1144,8 @@
       else
 	col = t;
     }
+    else if (wc == '\r'  /* ^M CR 0x0d */     && buf[ch+1] == '\n')
+      ;
     else if (wc < 0x20 || wc == 0x7f)
     {
       if (col + 2 > wrap_cols)
@@ -1154,11 +1156,11 @@
     }
     else if (wc < 0x100)
     {
-      if (col + 4 > wrap_cols)
+      if (col + 1 > wrap_cols)
 	break;
-      col += 4;
+      col += 1;
       if (pa)
-	printw ("\\%03o", wc);
+	addch (wc);
     }
     else
     {
--- protos.h.orig	2007-05-23 04:17:53.000000000 +0300
+++ protos.h	2018-03-17 10:55:09.328186000 +0200
@@ -343,14 +343,14 @@
 
 #ifdef LOCALES_HACK
 #define IsPrint(c) (isprint((unsigned char)(c)) || \
-	((unsigned char)(c) >= 0xa0))
-#define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0)
+	((unsigned char)(c) >= 0x80))
+#define IsWPrint(wc) (iswprint(wc) || wc >= 0x80)
 #else
 #define IsPrint(c) (isprint((unsigned char)(c)) || \
 	(option (OPTLOCALES) ? 0 : \
-	((unsigned char)(c) >= 0xa0)))
+	((unsigned char)(c) >= 0x80)))
 #define IsWPrint(wc) (iswprint(wc) || \
-	(option (OPTLOCALES) ? 0 : (wc >= 0xa0)))
+	(option (OPTLOCALES) ? 0 : (wc >= 0x80)))
 #endif
 
 #define new_pattern() safe_calloc(1, sizeof (pattern_t))


More information about the freebsd-questions mailing list