svn commit: r361884 - in head/usr.bin/sed: . tests

Oliver Pinter oliver.pntr at gmail.com
Sun Jun 7 18:58:58 UTC 2020


On Sunday, June 7, 2020, Kyle Evans <kevans at freebsd.org> wrote:

> Author: kevans
> Date: Sun Jun  7 04:32:38 2020
> New Revision: 361884
> URL: https://svnweb.freebsd.org/changeset/base/361884
>
> Log:
>   sed: attempt to learn about hex escapes (e.g. \x27)
>
>   Somewhat predictably, software often wants to use \x27/\x24 among others
> so
>   that they can decline worrying about ugly escaping, if said escaping is
> even
>   possible. Right now, this software is using these and getting the wrong
>   results, as we'll interpret those as x27 and x24 respectively. Some
> examples
>   of this, when an exp-run was ran, were science/octopus and misc/vifm.
>
>   Go ahead and process these at all times.  We allow either one or two
> digits,
>   and the tests account for both.  If extra digits are specified, e.g.
> \x2727,
>   then the third and fourth digits are interpreted literally as one might
>   expect.
>
>   PR:           229925
>   MFC after:    2 weeks


Could you please put an entry from this to release notes? :)



>
> Modified:
>   head/usr.bin/sed/compile.c
>   head/usr.bin/sed/tests/sed2_test.sh
>
> Modified: head/usr.bin/sed/compile.c
> ============================================================
> ==================
> --- head/usr.bin/sed/compile.c  Sun Jun  7 03:11:34 2020        (r361883)
> +++ head/usr.bin/sed/compile.c  Sun Jun  7 04:32:38 2020        (r361884)
> @@ -49,6 +49,7 @@ static const char sccsid[] = "@(#)compile.c   8.1 (Berke
>  #include <fcntl.h>
>  #include <limits.h>
>  #include <regex.h>
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -365,6 +366,51 @@ nonsel:            /* Now parse the command */
>         }
>  }
>
> +static int
> +hex2char(const char *in, char *out, int len)
> +{
> +       long ord;
> +       char *endptr, hexbuf[3];
> +
> +       hexbuf[0] = in[0];
> +       hexbuf[1] = len > 1 ? in[1] : '\0';
> +       hexbuf[2] = '\0';
> +
> +       errno = 0;
> +       ord = strtol(hexbuf, &endptr, 16);
> +       if (*endptr != '\0' || errno != 0)
> +               return (ERANGE);
> +       *out = (char)ord;
> +       return (0);
> +}
> +
> +static bool
> +hexdigit(char c)
> +{
> +       int lc;
> +
> +       lc = tolower(c);
> +       return isdigit(lc) || (lc >= 'a' && lc <= 'f');
> +}
> +
> +static bool
> +dohex(const char *in, char *out, int *len)
> +{
> +       int tmplen;
> +
> +       if (!hexdigit(in[0]))
> +               return (false);
> +       tmplen = 1;
> +       if (hexdigit(in[1]))
> +               ++tmplen;
> +       if (hex2char(in, out, tmplen) == 0) {
> +               *len = tmplen;
> +               return (true);
> +       }
> +
> +       return (false);
> +}
> +
>  /*
>   * Get a delimited string.  P points to the delimiter of the string; d
> points
>   * to a buffer area.  Newline and delimiter escapes are processed; other
> @@ -377,6 +423,7 @@ nonsel:             /* Now parse the command */
>  static char *
>  compile_delimited(char *p, char *d, int is_tr)
>  {
> +       int hexlen;
>         char c;
>
>         c = *p++;
> @@ -412,6 +459,12 @@ compile_delimited(char *p, char *d, int is_tr)
>                         }
>                         p += 2;
>                         continue;
> +               } else if (*p == '\\' && p[1] == 'x') {
> +                       if (dohex(&p[2], d, &hexlen)) {
> +                               ++d;
> +                               p += hexlen + 2;
> +                               continue;
> +                       }
>                 } else if (*p == '\\' && p[1] == '\\') {
>                         if (is_tr)
>                                 p++;
> @@ -431,7 +484,7 @@ compile_delimited(char *p, char *d, int is_tr)
>  static char *
>  compile_ccl(char **sp, char *t)
>  {
> -       int c, d;
> +       int c, d, hexlen;
>         char *s = *sp;
>
>         *t++ = *s++;
> @@ -459,6 +512,10 @@ compile_ccl(char **sp, char *t)
>                                 *t = '\t';
>                                 s++;
>                                 break;
> +                       case 'x':
> +                               if (dohex(&s[2], t, &hexlen))
> +                                       s += hexlen + 1;
> +                               break;
>                         }
>                 }
>         }
> @@ -499,7 +556,7 @@ static char *
>  compile_subst(char *p, struct s_subst *s)
>  {
>         static char lbuf[_POSIX2_LINE_MAX + 1];
> -       int asize, size;
> +       int asize, hexlen, size;
>         u_char ref;
>         char c, *text, *op, *sp;
>         int more = 1, sawesc = 0;
> @@ -562,6 +619,21 @@ compile_subst(char *p, struct s_subst *s)
>                                                 break;
>                                         case 't':
>                                                 *p = '\t';
> +                                               break;
> +                                       case 'x':
> +#define        ADVANCE_N(s, n)                                 \
> +       do {                                            \
> +               char *adv = (s);                        \
> +               while (*(adv + (n) - 1) != '\0') {      \
> +                       *adv = *(adv + (n));            \
> +                       ++adv;                          \
> +               }                                       \
> +               *adv = '\0';                            \
> +       } while (0);
> +                                               if (dohex(&p[1], p,
> &hexlen)) {
> +                                                       ADVANCE_N(p + 1,
> +                                                           hexlen);
> +                                               }
>                                                 break;
>                                         }
>                                 }
>
> Modified: head/usr.bin/sed/tests/sed2_test.sh
> ============================================================
> ==================
> --- head/usr.bin/sed/tests/sed2_test.sh Sun Jun  7 03:11:34 2020
> (r361883)
> +++ head/usr.bin/sed/tests/sed2_test.sh Sun Jun  7 04:32:38 2020
> (r361884)
> @@ -88,10 +88,39 @@ escape_subst_body()
>         atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c
>  }
>
> +atf_test_case hex_subst
> +hex_subst_head()
> +{
> +       atf_set "descr" "Verify proper conversion of hex escapes"
> +}
> +hex_subst_body()
> +{
> +       printf "test='foo'" > a
> +       printf "test='27foo'" > b
> +       printf "\rn" > c
> +       printf "xx" > d
> +
> +       atf_check -o 'inline:test="foo"' sed 's/\x27/"/g' a
> +       atf_check -o "inline:'test'='foo'" sed 's/test/\x27test\x27/g' a
> +
> +       # Make sure we take trailing digits literally.
> +       atf_check -o "inline:test=\"foo'" sed 's/\x2727/"/g' b
> +
> +       # Single digit \x should work as well.
> +       atf_check -o "inline:xn" sed 's/\xd/x/' c
> +
> +       # Invalid digit should cause us to ignore the sequence.  This test
> +       # invokes UB, escapes of an ordinary character.  A future change
> will
> +       # make regex(3) on longer tolerate this and we'll need to adjust
> what
> +       # we're doing, but for now this will suffice.
> +       atf_check -o "inline:" sed 's/\xx//' d
> +}
> +
>  atf_init_test_cases()
>  {
>         atf_add_test_case inplace_command_q
>         atf_add_test_case inplace_hardlink_src
>         atf_add_test_case inplace_symlink_src
>         atf_add_test_case escape_subst
> +       atf_add_test_case hex_subst
>  }
> _______________________________________________
> svn-src-head at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe at freebsd.org"
>


More information about the svn-src-head mailing list