git: bfc3815de6de - main - mail/bsfilter: Update to 1.0.20

From: Daniel Engberg <diizzy_at_FreeBSD.org>
Date: Wed, 05 Mar 2025 23:13:28 UTC
The branch main has been updated by diizzy:

URL: https://cgit.FreeBSD.org/ports/commit/?id=bfc3815de6de982f6784016156a0de3f9ee68dcb

commit bfc3815de6de982f6784016156a0de3f9ee68dcb
Author:     Yoshihiro Takahashi <nyan@FreeBSD.org>
AuthorDate: 2025-03-05 20:42:50 +0000
Commit:     Daniel Engberg <diizzy@FreeBSD.org>
CommitDate: 2025-03-05 23:12:46 +0000

    mail/bsfilter: Update to 1.0.20
    
    - Remove the local patch because it was obtained from the old OSDN
      repository
      and the new version includes all the changes
    - Change MASTER_SITES to Github
    - Update WWW
    
    Changelog: https://github.com/nbkenichi/bsfilter/compare/v1.0.19...v1.0.20
---
 mail/bsfilter/Makefile                      |   17 +-
 mail/bsfilter/distinfo                      |    5 +-
 mail/bsfilter/files/patch-bsfilter_bsfilter | 5075 ---------------------------
 3 files changed, 13 insertions(+), 5084 deletions(-)

diff --git a/mail/bsfilter/Makefile b/mail/bsfilter/Makefile
index ea29b285cc29..9b874ffdb1c4 100644
--- a/mail/bsfilter/Makefile
+++ b/mail/bsfilter/Makefile
@@ -1,26 +1,29 @@
 PORTNAME=	bsfilter
-PORTVERSION=	1.0.19
-PORTREVISION=	4
+DISTVERSIONPREFIX=	v
+DISTVERSION=	1.0.20
 CATEGORIES=	mail ruby
-MASTER_SITES=	OSDN/bsfilter
 
 MAINTAINER=	nyan@FreeBSD.org
 COMMENT=	Bayesian spam filter written in Ruby
-WWW=		https://osdn.net/projects/bsfilter/
+WWW=		https://github.com/nbkenichi/bsfilter
 
 LICENSE=	GPLv2
 
 RUN_DEPENDS=	rubygem-gdbm>=2.0.0,2:databases/rubygem-gdbm \
 		rubygem-sdbm>=1.0.0:databases/rubygem-sdbm
 
-USES=		ruby shebangfix tar:tgz
-SHEBANG_FILES=	bsfilter/bsfilter
+USES=		ruby shebangfix
+SHEBANG_FILES=	src/bsfilter.rb
+
+USE_GITHUB=	yes
+GH_ACCOUNT=	nbkenichi
+
 NO_BUILD=	yes
 
 OPTIONS_DEFINE=	EXAMPLES
 
 do-install:
-	${INSTALL_SCRIPT} ${WRKSRC}/bsfilter/${PORTNAME} ${STAGEDIR}${PREFIX}/bin/${PORTNAME}
+	${INSTALL_SCRIPT} ${WRKSRC}/src/bsfilter.rb ${STAGEDIR}${PREFIX}/bin/${PORTNAME}
 	@${MKDIR} ${STAGEDIR}${EXAMPLESDIR}
 .for FILE in bsfilter.conf.sample dot-qmail.sample
 	${INSTALL_DATA} ${FILESDIR}/${FILE} ${STAGEDIR}${EXAMPLESDIR}
diff --git a/mail/bsfilter/distinfo b/mail/bsfilter/distinfo
index 34c3fcd2b09a..8d4020051465 100644
--- a/mail/bsfilter/distinfo
+++ b/mail/bsfilter/distinfo
@@ -1,2 +1,3 @@
-SHA256 (bsfilter-1.0.19.tgz) = 8aa1d713cc848b20d678eb7a5f24bec1879860d023701644bfd426a587998ac9
-SIZE (bsfilter-1.0.19.tgz) = 78660
+TIMESTAMP = 1740387732
+SHA256 (nbkenichi-bsfilter-v1.0.20_GH0.tar.gz) = 10fb704f3528f3a81ed350c42d0980d9104ed11b366d85a5795fbbc6e04b91db
+SIZE (nbkenichi-bsfilter-v1.0.20_GH0.tar.gz) = 78005
diff --git a/mail/bsfilter/files/patch-bsfilter_bsfilter b/mail/bsfilter/files/patch-bsfilter_bsfilter
deleted file mode 100644
index ff5d3d2ca636..000000000000
--- a/mail/bsfilter/files/patch-bsfilter_bsfilter
+++ /dev/null
@@ -1,5075 +0,0 @@
---- bsfilter/bsfilter.orig	2013-11-03 10:22:15 UTC
-+++ bsfilter/bsfilter
-@@ -1,6 +1,6 @@
- #! /usr/bin/env ruby
--## -*-Ruby-*- $Id: bsfilter,v 1.87 2013/11/03 10:22:15 nabeken Exp $
--## Copyright (C) 2003, 2004, 2005, 2006 NABEYA Kenichi
-+## -*-Ruby-*- $Id: bsfilter,v 1.89 2023/12/26 05:52:39 nabeken Exp $
-+## Copyright (C) 2003-2023 NABEYA Kenichi
- ##
- ## This program is free software; you can redistribute it and/or modify
- ## it under the terms of the GNU General Public License as published by
-@@ -16,115 +16,112 @@
- ## along with this program; if not, write to the Free Software
- ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- 
-+require 'English'
- require 'getoptlong'
- require 'nkf'
- 
- class Bsfilter
-   def initialize
--    @threads = Array::new
-+    @threads = []
-     @token_dbs = nil
--    @options = Hash::new
--    @db_hash = Hash::new
-+    @options = {}
-+    @db_hash = {}
-     @jtokenizer = nil
-   end
-   attr_accessor :token_dbs
- 
--  Release = "$Name: release_1_0_19 $".split[1].sub(/\A[^\d]*/, '').gsub(/_/, '.')
--  Release.concat("-") if (Release == "")
--  Revision = "$Revision: 1.87 $".gsub(/[^\.\d]/, '')
--  Languages = ["C", "ja"]
--  Default_Language = "C"
-+  Release = '$Name:  $'.split[1].sub(/\A[^\d]*/, '').gsub(/_/, '.')
-+  Release.concat('-') if (Release == '')
-+  Revision = '$Revision: 1.89 $'.gsub(/[^.\d]/, '')
-+  Languages = %w[C ja].freeze
-+  Default_Language = 'C'.freeze
- 
--##  Options = Hash::new           # used like a global variable
--##  DB = Hash::new
--  
--  Default_header_prefix = "Spam"
--  Default_spam_subject_prefix = "[SPAM] "
--  Default_refer_header = 
--    ["Ufrom", "From", "To", "Cc", "Subject", "Reply-to", "Return-path", "Received",
--     "Content-Transfer-Encoding", "Content-Type", "charset", "Content-Disposition"].join(",")
--  
--  Default_jtokenizer = "bigram"
--  Default_mark_in_token = "|!*'"
--  Default_homedir = ".bsfilter"
--  Default_conf_file = "bsfilter.conf"
--  Default_pid_file = "bsfilter.pid"
--  
--  Default_method = "rf"           # Robinson Fisher
--  Default_db = "sdbm"
--  Default_max_mail = 10000
-+  ##  Options = Hash::new           # used like a global variable
-+  ##  DB = Hash::new
-+
-+  Default_header_prefix = 'Spam'.freeze
-+  Default_spam_subject_prefix = '[SPAM] '.freeze
-+  Default_refer_header =
-+    %w[Ufrom From To Cc Subject Reply-to Return-path Received
-+       Content-Transfer-Encoding Content-Type charset Content-Disposition].join(',')
-+
-+  Default_jtokenizer = 'bigram'.freeze
-+  Default_mark_in_token = "|!*'".freeze
-+  Default_homedir = '.bsfilter'.freeze
-+  Default_conf_file = 'bsfilter.conf'.freeze
-+  Default_pid_file = 'bsfilter.pid'.freeze
-+
-+  Default_method = 'rf'.freeze # Robinson Fisher
-+  Default_db = 'sdbm'.freeze
-+  Default_max_mail = 10_000
-   Default_min_mail = 8000
-   Default_max_line = 500
--  
--  Default_pop_proxy_if = "0.0.0.0"
--  Default_pop_port = "110"
--  Default_pop_proxy_port = "10110"
--  Default_pop_max_size = 50000
--  
--  Default_imap_port = "143"
--  Default_imap_auth = "auto"
--  Default_imap_auth_preference = ["cram-md5", "login", "loginc"]
- 
--  Default_icon_number = 32512
--  
--  Clean_ext = ".clean"
--  Spam_ext = ".spam"
--  Prob_ext = ".prob"
--  Lock_ext = ".lock"
--  
--  SDBM_ext = ".sdbm"
--  GDBM_ext = ".gdbm"
--  BDB1_ext = ".bdb1"
--  BDB_ext = ".bdb"
--  QDBM_ext = ".qdbm"
--  
-+  Default_pop_proxy_if = '0.0.0.0'.freeze
-+  Default_pop_port = '110'.freeze
-+  Default_pop_proxy_port = '10110'.freeze
-+  Default_pop_max_size = 50_000
-+
-+  Default_imap_port = '143'.freeze
-+  Default_imap_auth = 'auto'.freeze
-+  Default_imap_auth_preference = %w[cram-md5 login loginc].freeze
-+
-+  Default_icon_number = 32_512
-+
-+  Clean_ext = '.clean'.freeze
-+  Spam_ext = '.spam'.freeze
-+  Prob_ext = '.prob'.freeze
-+  Lock_ext = '.lock'.freeze
-+
-+  NDBM_ext = '.ndbm'.freeze
-+  SDBM_ext = '.sdbm'.freeze
-+  GDBM_ext = '.gdbm'.freeze
-+  BDB1_ext = '.bdb1'.freeze
-+  BDB_ext = '.bdb'.freeze
-+  QDBM_ext = '.qdbm'.freeze
-+
-   EXIT_NORMAL = 0
-   CODE_NORMAL = true
-   CODE_SPAM = true
-   CODE_CLEAN = false
--  
--  CODESET_EUCJP = "eucJP"
--  CODESET_LATIN = "ISO8859-1"
--  CODESET_GB18030 = "GB18030"
--  CODESET_UTF8 = "UTF-8"
--  PATTERN_UTF8 = '[\xe0-\xef][\x80-\xbf][\x80-\xbf][\xe0-\xef][\x80-\xbf][\x80-\xbf]'
--  RE_UTF8 = Regexp.new(PATTERN_UTF8, nil, 'n')
--  
--  ALL_TAGS = ["html", "head", "title", "meta", "body", "div", "spam",
--              "h1", "h2", "h3", "h4", "h5", "h6",
--              "em", "strong", "font", "basefont", "big", "small",
--              "b", "i", "s", "u", "tt", "sub", "sub",
--              "rb", "rp", "rt","ruby",
--              "blink", "marquee",
--              "dfn", "cite", "abbr", "acronym",
--              "blockquote", "q",
--              "br", "pre", "ins", "del", "center", "style", "hr",
--              "ul", "ol", "li", "dl", "dt", "dd",
--              "table", "caption", "thead", "tbody", "tfoot",
--              "colgroup", "col", "tr", "td", "th",
--              "a", "link", "base", "img", "address",
--              "form", "input", "select", "option", "textarea", "label",
--              "fieldset", "legend", "optgroup",
--              "frameset", "frame", "nofrmaes", "iframe"].join('|')
--  
--  SPACE_TAGS = "br|p|td|tr|table|ul|ol|dl|li|dt|dd"
--  
--  RE_ALL_TAGS = Regexp::compile('\A<(' + ALL_TAGS + ')\b', Regexp::IGNORECASE, 'n')
--  RE_SPACE_TAGS = Regexp::compile('\A<(' + SPACE_TAGS + ')\b', Regexp::IGNORECASE, 'n')
--  
--  SOCKET_TIMEOUT = 30             # for single socket operation
--  
-+
-+  LOG_CODESET = 'UTF-8'.freeze	# codeset for verbose and debug message. nil => no conversion
-+
-+  ALL_TAGS = %w[html head title meta body div spam
-+                h1 h2 h3 h4 h5 h6
-+                em strong font basefont big small
-+                b i s u tt sub sub
-+                rb rp rt ruby
-+                blink marquee
-+                dfn cite abbr acronym
-+                blockquote q
-+                br pre ins del center style hr
-+                ul ol li dl dt dd
-+                table caption thead tbody tfoot
-+                colgroup col tr td th
-+                a link base img address
-+                form input select option textarea label
-+                fieldset legend optgroup
-+                frameset frame nofrmaes iframe].join('|')
-+
-+  SPACE_TAGS = 'br|p|td|tr|table|ul|ol|dl|li|dt|dd'.freeze
-+
-+  RE_ALL_TAGS = Regexp.compile('\A<(' + ALL_TAGS + ')\b', Regexp::IGNORECASE)
-+  RE_SPACE_TAGS = Regexp.compile('\A<(' + SPACE_TAGS + ')\b', Regexp::IGNORECASE)
-+
-+  SOCKET_TIMEOUT = 30 # for single socket operation
-+
-   module Bsutil
-     def insert_header!(buf, header, content)
-       buf[0] =~ /([\r\n]*)\z/
--      eol = $1
--      
--      (0 ... buf.length).each do |i|
--        if ((i == 0) &&         # unix from line
-+      eol = ::Regexp.last_match(1)
-+
-+      (0...buf.length).each do |i|
-+        if (i.zero? && # unix from line
-             (buf[i] =~ /\A>?from\s+(\S+)/))
-           next
--        elsif (buf[i] =~/\A(.*?:)/)
--          h = $1
-+        elsif (buf[i] =~ /\A(.*?:)/)
-+          h = ::Regexp.last_match(1)
-           if (h == header)
-             buf[i] = "#{header} #{content}#{eol}"
-             return
-@@ -134,7 +131,7 @@ class Bsfilter
-         elsif (buf[i] =~ /\A[\r\n]*\z/) # separator between header and body
-           buf[i, 0] = "#{header} #{content}#{eol}"
-           return
--        else                    # not header. may be body without separator
-+        else # not header. may be body without separator
-           buf[i, 0] = "#{header} #{content}#{eol}"
-           return
-         end
-@@ -144,17 +141,17 @@ class Bsfilter
- 
-     def append_header!(buf, header, prefix)
-       buf[0] =~ /([\r\n]*)\z/
--      eol = $1
-+      eol = ::Regexp.last_match(1)
-       append_done = false
--      (0 ... buf.length).each do |i|
--        if (buf[i] =~/\A(.*?:)(\s*)(.*?)([\r\n]*)\z/)
--          h = $1
--          org_content = $3
-+      (0...buf.length).each do |i|
-+        if (buf[i] =~ /\A(.*?:)(\s*)(.*?)([\r\n]*)\z/)
-+          h = ::Regexp.last_match(1)
-+          org_content = ::Regexp.last_match(3)
-           if (h.downcase == header.downcase)
-             buf[i] = "#{header} #{prefix}#{org_content}#{eol}"
-             append_done = true
-           end
--        elsif ((! append_done) &&
-+        elsif (!append_done &&
-                (((buf[i] =~ /\A\S/) && (buf[i] !~ /\A\S+:/)) || # found body without separator
-                 (buf[i] =~ /\A[\r\n]*\z/))) # separator between header and body
-           buf[i, 0] = "#{header} #{prefix}#{eol}"
-@@ -166,104 +163,96 @@ class Bsfilter
-     end
- 
-     def x_spam_flag
--      return sprintf("X-%s-Flag:", @options["header-prefix"])
-+      return format('X-%s-Flag:', @options['header-prefix'])
-     end
--    
-+
-     def x_spam_probability
--      return sprintf("X-%s-Probability:", @options["header-prefix"])
-+      return format('X-%s-Probability:', @options['header-prefix'])
-     end
--    
-+
-     def x_spam_revision
--      return sprintf("X-%s-Revision:", @options["header-prefix"])
-+      return format('X-%s-Revision:', @options['header-prefix'])
-     end
--    
--    def insert_headers!(buf, spam_flag, probability=nil)
-+
-+    def insert_headers!(buf, spam_flag, probability = nil)
-       updated = false
--      if (@options["insert-revision"])
-+      if (@options['insert-revision'])
-         insert_header!(buf, x_spam_revision, "bsfilter release #{Release} revision #{Revision}")
-         updated = true
-       end
--      if (@options["insert-flag"])
-+      if (@options['insert-flag'])
-         updated = true
--        if (spam_flag)
--          insert_header!(buf, x_spam_flag, "Yes")
-+        if spam_flag
-+          insert_header!(buf, x_spam_flag, 'Yes')
-         else
--          insert_header!(buf, x_spam_flag, "No")
-+          insert_header!(buf, x_spam_flag, 'No')
-         end
-       end
--      if (@options["insert-probability"] && probability)
-+      if (@options['insert-probability'] && probability)
-         updated = true
--        insert_header!(buf, x_spam_probability, sprintf("%f", probability))
-+        insert_header!(buf, x_spam_probability, format('%f', probability))
-       end
--      if (@options["mark-spam-subject"])
-+      if (@options['mark-spam-subject'])
-         updated = true
--        if (spam_flag)
--          append_header!(buf, "Subject:", @options["spam-subject-prefix"])
--        end
-+        append_header!(buf, 'Subject:', @options['spam-subject-prefix']) if spam_flag
-       end
-       return updated
-     end
--  end                           # end of module
-+  end
- 
-   include Bsutil
- 
-   class DevNull
--    def sync=(*args)
--    end
--    def print(*args)
--    end
--    def printf(*args)
--    end
-+    def sync=(*args); end
-+
-+    def print(*args); end
-+
-+    def printf(*args); end
-   end
- 
-   class DBHash < Hash
--    def flatten(magic="###", head="", &block)
--      self.each do |k, v|
--        if (v.class == DBHash)
--          if (head == "")
-+    def flatten(magic = '###', head = '', &block)
-+      each do |k, v|
-+        if v.instance_of?(DBHash)
-+          if (head == '')
-             v.flatten(magic, k, &block)
-           else
-             v.flatten(magic, head + magic + k, &block)
-           end
-+        elsif (head == '')
-+          yield k, v
-         else
--          if (head == "")
--            yield k, v
--          else
--            yield head + magic + k, v
--          end
-+          yield head + magic + k, v
-         end
-       end
-     end
--    
-+
-     def add(hash)
-       hash.each do |k, v|
-         if (self[k])
--          if ((self[k].class == DBHash) &&
--              (v.class == DBHash))
-+          if (self[k].instance_of?(DBHash) &&
-+              v.instance_of?(DBHash))
-             self[k].add(v)
-           else
-             self[k] += v
-           end
-         else
--          self[k] = v             # should do deep copy ?
-+          self[k] = v # should do deep copy ?
-         end
-       end
-     end
-+
-     def sub(hash)
-       hash.each do |k, v|
-         if (self[k])
--          if ((self[k].class == DBHash) &&
--              (v.class == DBHash))
-+          if (self[k].instance_of?(DBHash) &&
-+              v.instance_of?(DBHash))
-             self[k].sub(v)
--            if (self[k].empty?)
--              self.delete(k)
--            end
-+            delete(k) if self[k].empty?
-+          elsif (self[k] > v)
-+            self[k] -= v
-           else
--            if (self[k] > v)
--              self[k] -= v
--            else
--              self.delete(k)
--            end
-+            delete(k)
-           end
-         end
-       end
-@@ -271,38 +260,38 @@ class Bsfilter
-   end
- 
-   def safe_require(file)
--    begin
--      require file
--      return true
--    rescue LoadError
--      return false
--    end
-+    require file
-+    return true
-+  rescue LoadError
-+    return false
-   end
- 
-   def latin2ascii(str)
-     str.force_encoding('ASCII-8BIT')
-     newstr = str.tr("\x92\x93\x94".force_encoding('ASCII-8BIT'), "'''")
--    newstr.tr!("\xc0-\xc5\xc8-\xcb\xcc-\xcf\xd2-\xd6\xd9-\xdc".force_encoding('ASCII-8BIT'), "AAAAAAEEEEIIIIOOOOOUUUU")
--    newstr.tr!("\xe0-\xe5\xe8-\xeb\xec-\xef\xf2-\xf6\xf9-\xfc".force_encoding('ASCII-8BIT'), "aaaaaaeeeeiiiiooooouuuu")
-+    newstr.tr!("\xc0-\xc5\xc8-\xcb\xcc-\xcf\xd2-\xd6\xd9-\xdc".force_encoding('ASCII-8BIT'), 'AAAAAAEEEEIIIIOOOOOUUUU')
-+    newstr.tr!("\xe0-\xe5\xe8-\xeb\xec-\xef\xf2-\xf6\xf9-\xfc".force_encoding('ASCII-8BIT'), 'aaaaaaeeeeiiiiooooouuuu')
-     return newstr
-   end
- 
-   def u2eucjp(str)
--      return NKF::nkf('-e -E -X -Z0', str.encode('EUC-JP', 'UTF-8', :undef => :replace, :invalid => :replace))
-+    return NKF.nkf('-e -E -X -Z0', str.encode('EUC-JP', 'UTF-8', undef: :replace, invalid: :replace))
-   end
-+
-   def u2latin(str)
--    return str.encode('US-ASCII', 'UTF-8', :undef => :replace, :invalid => :replace)
-+    return str.encode('US-ASCII', 'UTF-8', undef: :replace, invalid: :replace)
-   end
-+
-   def gb180302eucjp(str)
--    return str.encode('EUC-JP', 'BIG5', :undef => :replace, :invalid => :replace)
-+    return str.encode('EUC-JP', 'BIG5', undef: :replace, invalid: :replace)
-   end
--  
-+
-   def open_ro(file)
--    if (file == "-")
--      fh = STDIN
-+    if (file == '-')
-+      fh = $stdin
-       yield fh
--    elsif (file.class == Array)
--      file.instance_eval <<EOM
-+    elsif file.instance_of?(Array)
-+      file.instance_eval <<EOM, __FILE__, __LINE__ + 1
-       @eof = false
-       def gets
-         @n = 0 if (! @n)
-@@ -323,66 +312,67 @@ class Bsfilter
- EOM
-       yield file
-     else
--      if (! FileTest::file?(file))
--        raise sprintf("%s is not file", file)
-+      if (! FileTest.file?(file))
-+        raise format('%s is not file', file)
-       end
--      fh = open(file, "rb")
-+
-+      fh = File.open(file, 'rb')
-       yield fh
-       fh.close
-     end
-   end
--  
-+
-   def open_wo(file, &block)
--    if (file == "-")
--      fh = STDOUT
-+    if (file == '-')
-+      fh = $stdout
-     else
--      fh = open(file, "wb")
-+      fh = open(file, 'wb')
-     end
-     if (block)
-       yield fh
--      if (file != "-")
-+      if (file != '-')
-         fh.close
-       end
-     else
-       return fh
-     end
-   end
--  
-+
-   class FLOAT
--    def initialize(f=0, power=1)
-+    def initialize(f = 0, power = 1)
-       @mant = 0
-       @exp = 0
-       set_f(f, power)
-     end
-     attr_accessor :mant, :exp
--    
-+
-     def to_f
--      return @mant * Math::exp(@exp)
-+      return @mant * Math.exp(@exp)
-     end
--    
-+
-     def ln
--      return Math::log(@mant) + @exp
-+      return Math.log(@mant) + @exp
-     end
--    
--    def * (a)
--      if (a.class == FLOAT)
--        n = FLOAT::new
-+
-+    def *(a)
-+      n = FLOAT.new
-+      if a.instance_of?(FLOAT)
-         n.mant = @mant * a.mant
-         n.exp = @exp + a.exp
-       else
--        n = FLOAT::new
-         n.exp = @exp
-         n.mant = @mant * a
-       end
-       return n
-     end
--    def set_f (a, power=1)
--      if (a > 0)
-+
-+    def set_f(a, power = 1)
-+      if a.positive?
-         @mant = 1
--        @exp = Math::log(a) * power
--      elsif (a < 0)
-+        @exp = Math.log(a) * power
-+      elsif a.negative?
-         @mant = -1
--        @exp = Math::log(-a) * power
-+        @exp = Math.log(-a) * power
-       else
-         @mant = 0
-         @exp = 0
-@@ -390,24 +380,24 @@ EOM
-       self
-     end
-   end
--  
--  
-+
-   module TokenAccess
-     def check_size(max_size, min_size)
-       if ((@file_count <= max_size) || (max_size <= 0) || (min_size <= 0))
-         return false
-       end
-+
-       old_count = @file_count
--      if (@options["verbose"])
--        @options["message-fh"].printf("reduce token database %s from %d to %d\n", @filename, old_count, min_size)
-+      if (@options['verbose'])
-+        @options['message-fh'].printf("reduce token database %s from %d to %d\n", @filename, old_count, min_size)
-       end
--      
-+
-       key_cts.each do |(category, token)|
--        if (category != ".internal")
-+        if (category != '.internal')
-           v = value(category, token) || 0
-           sub_scalar(category, token, (v * (old_count - min_size).to_f / old_count.to_f).ceil)
--          if (@options["debug"] && ! value(category, token))
--            @options["message-fh"].printf("deleted %s %s\n", category, token)
-+          if (@options['debug'] && ! value(category, token))
-+            @options['message-fh'].printf("deleted %s %s\n", category, token.to_utf8)
-           end
-         end
-       end
-@@ -415,41 +405,47 @@ EOM
-       @dirty = true
-       return true
-     end
--    
-+
-     def value_with_degene(category, token)
--      if (value(category, token))
-+      if value(category, token)
-         return value(category, token)
--      elsif (! @options["degeneration"])           # no degeneration
-+      elsif (!@options['degeneration']) # no degeneration
-         return nil
-       else
--        if (v = value(category, token[0 .. -2])) # cut last char
--          return v 
-+        if (v = value(category, token[0..-2])) # cut last char
-+          return v
-         end
--        token = token.gsub(Regexp::compile("[#{@options['mark-in-token']}]"), '')
-+
-+        token = token.gsub(Regexp.compile("[#{@options['mark-in-token']}]"), '')
-         if (v = value(category, token))
--          return v 
-+          return v
-         end
-+
-         token = token.downcase
-         if (v = value(category, token))
--          return v 
-+          return v
-         end
-+
-         token = token.upcase
-         if (v = value(category, token))
--          return v 
-+          return v
-         end
-+
-         token = token.capitalize
-         if (v = value(category, token))
--          return v 
-+          return v
-         end
-+
-         return nil
-       end
-     end
-+
-     def set_scalar(category, token, val)
-       @dirty = true
-       @file_count += 1
-       set(category, token, val)
-     end
--    
-+
-     def add_scalar(category, token, val)
-       @dirty = true
-       @file_count += 1
-@@ -459,58 +455,58 @@ EOM
-         set(category, token, val)
-       end
-     end
--    
-+
-     def show_new_token(db)
-       db.each_ct do |category, token|
--        if (! value(category, token) || (value(category, token) == 0))
--          @options["message-fh"].printf("new %s %s\n", category, token)
-+        if (!value(category, token) || value(category, token).zero?)
-+          @options['message-fh'].printf("new %s %s\n", category, token.to_utf8)
-         end
-       end
-     end
--    
-+
-     def values
--      array = Array::new
-+      array = []
-       each_ct do |c, t|
-         array.push(value(c, t))
-       end
-       return array
-     end
--    
-+
-     def key_cts
--      array = Array::new
-+      array = []
-       each_ct do |c, t|
-         array.push([c, t])
-       end
-       return array
-     end
--    
-+
-     def export(fh)
-       each_ct do |category, token|
--        fh.printf("%s %s %s %g\n", @language, category, token, value(category, token)) if (value(category, token))
-+        fh.printf("%s %s %s %g\n", @language, category, token, value(category, token)) if value(category, token)
-       end
-     end
-   end
--  
-+
-   class TokenDB
-     include TokenAccess
--    
--    def initialize(language=nil)
--      @hash = DBHash::new
-+
-+    def initialize(language = nil)
-+      @hash = DBHash.new
-       @file_count = 0
-       @language = language
--      @message_id = "-"
-+      @message_id = '-'
-       @probability = nil
-       @spam_flag = nil
-       @dirty = false
-       @time = nil
--      @filename = "-"
-+      @filename = '-'
-     end
-     attr_accessor :hash, :file_count, :probability, :language, :spam_flag, :message_id, :time, :filename
--    
-+
-     def size
-       @hash.size
-     end
--    
-+
-     def each_ct
-       @hash.each_key do |category|
-         @hash[category].each_key do |token|
-@@ -518,9 +514,9 @@ EOM
-         end
-       end
-     end
--    
-+
-     def value(category, token)
--      if (! @hash[category])
-+      if (!@hash[category])
-         return nil
-       elsif (v = @hash[category][token])
-         return v
-@@ -528,14 +524,14 @@ EOM
-         return nil
-       end
-     end
--    
-+
-     def set(category, token, v)
-       @dirty = true
--      @hash[category] = DBHash::new if (! @hash[category])
-+      @hash[category] = DBHash.new if (! @hash[category])
-       @hash[category][token] = v
-     end
--    
--    def print_keys_to_str(hash, separator, fh=STDOUT)
-+
-+    def print_keys_to_str(hash, separator, fh = $stdout)
-       hash.keys.sort.each do |k|
-         v = hash[k]
-         v = v.to_i
-@@ -543,57 +539,49 @@ EOM
-         fh.print(([k] * v).join(separator))
-       end
-     end
--    
-+
-     def clear
-       @dirty = true
-       @file_count = 0
--      @hash = DBHash::new
-+      @hash = DBHash.new
-     end
--    
-+
-     def add_db(db)
-       @dirty = true
-       @file_count += db.file_count
--      if (! @language && db.language)
--        @language = db.language
--      end
-+      @language = db.language if (!@language && db.language)
-       @hash.add(db.hash)
-     end
--    
-+
-     def add_hash(hash)
-       @dirty = true
-       @file_count += 1
-       @hash.add(hash)
-     end
--    
-+
-     def sub_scalar(category, token, val)
--      if (@file_count > 0)
--        @file_count -= 1
--      end
--      @hash.sub({category => {token => val}})
-+      @file_count -= 1 if @file_count.positive?
-+      @hash.sub({ category => { token => val } })
-     end
--    
-+
-     def sub_hash(hash)
-       @dirty = true
--      if (@file_count > 0)
--        @file_count -= 1
--      end
-+      @file_count -= 1 if @file_count.positive?
-       @hash.sub(hash)
-     end
--    
-+
-     def sub_db(db)
-       @dirty = true
-       @file_count -= db.file_count
--      if (@file_count < 1)
--        @file_count = 1
--      end
-+      @file_count = 1 if (@file_count < 1)
-       @hash.sub(db.hash)
-     end
-   end
--  
-+
-   class TokenDBM
-     include TokenAccess
--    MAGIC = "###"
--    def initialize(options, language, ext)
-+    MAGIC = '###'.freeze
-+    def initialize(options, language, _ext)
-       @options = options
-       @dbm = nil                  # SDBM not Hash
-       @dirty = nil                # not used. for TokenAccess
-@@ -602,13 +590,13 @@ EOM
-       @language = language
-     end
-     attr_accessor :file_count
--    
-+
-     def size
-       @dbm.size
-     end
--    
-+
-     def to_db
--      token_db = TokenDB::new(@language)
-+      token_db = TokenDB.new(@language)
-       @dbm.each do |ct, v|
-         (category, token) = ct.split(Regexp.new(MAGIC), 2)
-         token_db.set(category, token, v)
-@@ -616,25 +604,25 @@ EOM
-       end
-       return token_db
-     end
--    
-+
-     def clear
-       @dbm.clear
-       @file_count = 0
--      set(".internal", "file_count", 0)
-+      set('.internal', 'file_count', 0)
-     end
--    
-+
-     def each_ct
-       @dbm.each_key do |ct|
-         (category, token) = ct.force_encoding('ASCII-8BIT').split(Regexp.new(MAGIC), 2)
-         yield(category, token) if (category && token)
-       end
-     end
--    
-+
-     def add_db(token_db)
-       add_hash(token_db.hash)
-       @file_count += + token_db.file_count
-     end
--    
-+
-     def add_hash(hash)
-       @dirty = true
-       hash.flatten(MAGIC) do |k, v|
-@@ -645,15 +633,16 @@ EOM
-         end
-       end
-     end
--    
-+
-     def sub_db(token_db)
-       sub_hash(token_db.hash)
-       if (@file_count > token_db.file_count)
-         @file_count -= token_db.file_count
-       else
--        @file_count= 0
-+        @file_count = 0
-       end
-     end
-+
-     def sub_hash(hash)
-       @dirty = true
-       hash.flatten(MAGIC) do |k, v|
-@@ -666,24 +655,27 @@ EOM
-         end
-       end
-     end
--    
-+
-     def value(category, token)
-       v = @dbm[category + MAGIC + token]
--      if (v)
--        return v.to_f
--      else
--        return nil
--      end
-+      return v.to_f if v
-+
-+      return nil
-     end
--    
-+
-     def set(category, token, v)
-       @dirty = true
--      @dbm[category + MAGIC + token] = v.to_s
-+      begin
-+        @dbm[category + MAGIC + token] = v.to_s
-+      rescue
-+        @options['message-fh'].puts($ERROR_INFO.inspect, category + MAGIC + token, v.to_s) if (@options['verbose'])
-+        @options['message-fh'].puts($ERROR_POSITION) if (@options['debug'])
-+      end
-     end
--    
-+
*** 4170 LINES SKIPPED ***