git: 9f6a619a7d7a - main - mandoc: workaround lack of macro parsing in list -width

From: Eric van Gyzen <vangyzen_at_FreeBSD.org>
Date: Tue, 31 May 2022 15:08:49 UTC
The branch main has been updated by vangyzen:

URL: https://cgit.FreeBSD.org/src/commit/?id=9f6a619a7d7ae0764374ff9949c8bfad36f4096b

commit 9f6a619a7d7ae0764374ff9949c8bfad36f4096b
Author:     Eric van Gyzen <vangyzen@FreeBSD.org>
AuthorDate: 2022-05-17 17:46:59 +0000
Commit:     Eric van Gyzen <vangyzen@FreeBSD.org>
CommitDate: 2022-05-31 15:06:46 +0000

    mandoc: workaround lack of macro parsing in list -width
    
    GNU tools parse macros in the -width argument of lists.  mandoc does not,
    so it calculates an excessive width.  This often squeezes the text into
    a very narrow column, especially in nested lists.
    
    Implement the easy workaround suggested in the TODO list.  When there is
    only one macro, at the beginning of the -width argument, this fixes the
    formatting as well as a complete solution.
    
    Reviewed by:    bapt
    Relnotes:       yes
    MFC after:      1 week
    Sponsored by:   Dell EMC Isilon
    Differential Revision:  https://reviews.freebsd.org/D35245
---
 contrib/mandoc/TODO            |  4 +---
 contrib/mandoc/mdoc_validate.c | 52 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/contrib/mandoc/TODO b/contrib/mandoc/TODO
index fe2059c9e5d5..4135a3eca6c8 100644
--- a/contrib/mandoc/TODO
+++ b/contrib/mandoc/TODO
@@ -563,9 +563,7 @@ are mere guesses, and some may be wrong.
   reported again by Franco Fichtner Fri, 27 Sep 2013 21:02:28 +0200
   reported again by Bruce Evans Fri, 17 Feb 2017 21:22:44 +0100 via bapt@
   loc ***  exist ***  algo ***  size **  imp ***
-  An easy partial fix would be to just skip the first word if it starts
-  with a dot, including any following white space, when measuring.
-  loc *  exist *  algo *  size *  imp ***
+  An easy partial fix has been implemented as skip_leading_dot_word().
 
 - The \& zero-width character counts as output.
   That is, when it is alone on a line between two .Pp,
diff --git a/contrib/mandoc/mdoc_validate.c b/contrib/mandoc/mdoc_validate.c
index e1cd3ae1edcb..5c40842b1ae1 100644
--- a/contrib/mandoc/mdoc_validate.c
+++ b/contrib/mandoc/mdoc_validate.c
@@ -1818,6 +1818,45 @@ post_bl_block(POST_ARGS)
 	}
 }
 
+/*
+ * If "in" begins with a dot, a word, and whitespace, return a dynamically
+ * allocated copy of "in" that skips all of those.  Otherwise, return NULL.
+ *
+ * This is a partial workaround for the TODO list item beginning with:
+ * - When the -width string contains macros, the macros must be rendered
+ */
+static char *
+skip_leading_dot_word(const char *in)
+{
+	const char *iter = in;
+	const char *space;
+
+	if (*iter != '.')
+		return NULL;
+	iter++;
+
+	while (*iter != '\0' && !isspace(*iter))
+		iter++;
+	/*
+	 * If the dot was followed by space or NUL,
+	 * do not skip anything.
+	 */
+	if (iter == in + 1)
+		return NULL;
+
+	space = iter;
+	while (isspace(*iter))
+		iter++;
+	/*
+	 * If the word was not followed by space,
+	 * do not skip anything.
+	 */
+	if (iter == space)
+		return NULL;
+
+	return strdup(iter);
+}
+
 /*
  * If the argument of -offset or -width is a macro,
  * replace it with the associated default width.
@@ -1827,18 +1866,23 @@ rewrite_macro2len(struct roff_man *mdoc, char **arg)
 {
 	size_t		  width;
 	enum roff_tok	  tok;
+	char		 *newarg;
 
+	newarg = NULL;
 	if (*arg == NULL)
 		return;
 	else if ( ! strcmp(*arg, "Ds"))
 		width = 6;
-	else if ((tok = roffhash_find(mdoc->mdocmac, *arg, 0)) == TOKEN_NONE)
-		return;
-	else
+	else if ((tok = roffhash_find(mdoc->mdocmac, *arg, 0)) != TOKEN_NONE)
 		width = macro2len(tok);
+	else if ((newarg = skip_leading_dot_word(*arg)) == NULL)
+		return;
 
 	free(*arg);
-	mandoc_asprintf(arg, "%zun", width);
+	if (newarg != NULL)
+		*arg = newarg;
+	else
+		mandoc_asprintf(arg, "%zun", width);
 }
 
 static void