git: e4d3f14695 - main - improve URL matching

From: Wolfram Schneider <wosch_at_FreeBSD.org>
Date: Sun, 28 Apr 2024 18:21:40 UTC
The branch main has been updated by wosch:

URL: https://cgit.FreeBSD.org/doc/commit/?id=e4d3f14695b1e2a23143ca940e3b73df224dcb43

commit e4d3f14695b1e2a23143ca940e3b73df224dcb43
Author:     Wolfram Schneider <wosch@FreeBSD.org>
AuthorDate: 2024-04-28 18:19:51 +0000
Commit:     Wolfram Schneider <wosch@FreeBSD.org>
CommitDate: 2024-04-28 18:19:51 +0000

    improve URL matching
    
    for trailing '>', comma and other characters
    
    PR: 266336
---
 website/content/en/cgi/man.cgi | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/website/content/en/cgi/man.cgi b/website/content/en/cgi/man.cgi
index 8ddfa7f27f..d82411491b 100755
--- a/website/content/en/cgi/man.cgi
+++ b/website/content/en/cgi/man.cgi
@@ -1579,6 +1579,18 @@ sub manpath_without_ports {
     return join(":", @list);
 }
 
+# strip trailing dots, comma etc. from an URL
+sub url_strip {
+    my $url = shift;
+    my $part = shift;
+
+    if ($url =~ m/(.+)([,\.])$/) {
+       return ($1, $1, $2);
+    } else {
+       return ($url, $url, "");
+    }
+}
+
 sub man {
     local ( $name, $section, $arch ) = @_;
     local ( $_, $title, $head, *MAN );
@@ -1842,8 +1854,8 @@ s/([a-z0-9_\-\.]+\@[a-z0-9\-\.]+\.[a-z]+)/<a href="mailto:$1">$1<\/A>/gi;
         }
 
         # detect URLs in manpages
-        if (m,\b(ftp|http|https)://,) {
-            s,((ftp|http|https)://[^\s<>\)]+),<a href="$1">$1</a>,gi;
+        if (m,\b(http|https)://,) {
+            s|(https?://[^\s\)&<>'`";\]\[]+)|sprintf("<a href=\"%s\">%s</a>%s", &url_strip($1))|egi;
         }
 
         if (s%^(<b>.*?</b>)+\n?$% ($str = $1) =~ s,(<b>|</b>),,g; $str%ge) {