git: bfc3815de6de - main - mail/bsfilter: Update to 1.0.20
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 05 Mar 2025 23:13:28 UTC
The branch main has been updated by diizzy:
URL: https://cgit.FreeBSD.org/ports/commit/?id=bfc3815de6de982f6784016156a0de3f9ee68dcb
commit bfc3815de6de982f6784016156a0de3f9ee68dcb
Author: Yoshihiro Takahashi <nyan@FreeBSD.org>
AuthorDate: 2025-03-05 20:42:50 +0000
Commit: Daniel Engberg <diizzy@FreeBSD.org>
CommitDate: 2025-03-05 23:12:46 +0000
mail/bsfilter: Update to 1.0.20
- Remove the local patch because it was obtained from the old OSDN
repository
and the new version includes all the changes
- Change MASTER_SITES to Github
- Update WWW
Changelog: https://github.com/nbkenichi/bsfilter/compare/v1.0.19...v1.0.20
---
mail/bsfilter/Makefile | 17 +-
mail/bsfilter/distinfo | 5 +-
mail/bsfilter/files/patch-bsfilter_bsfilter | 5075 ---------------------------
3 files changed, 13 insertions(+), 5084 deletions(-)
diff --git a/mail/bsfilter/Makefile b/mail/bsfilter/Makefile
index ea29b285cc29..9b874ffdb1c4 100644
--- a/mail/bsfilter/Makefile
+++ b/mail/bsfilter/Makefile
@@ -1,26 +1,29 @@
PORTNAME= bsfilter
-PORTVERSION= 1.0.19
-PORTREVISION= 4
+DISTVERSIONPREFIX= v
+DISTVERSION= 1.0.20
CATEGORIES= mail ruby
-MASTER_SITES= OSDN/bsfilter
MAINTAINER= nyan@FreeBSD.org
COMMENT= Bayesian spam filter written in Ruby
-WWW= https://osdn.net/projects/bsfilter/
+WWW= https://github.com/nbkenichi/bsfilter
LICENSE= GPLv2
RUN_DEPENDS= rubygem-gdbm>=2.0.0,2:databases/rubygem-gdbm \
rubygem-sdbm>=1.0.0:databases/rubygem-sdbm
-USES= ruby shebangfix tar:tgz
-SHEBANG_FILES= bsfilter/bsfilter
+USES= ruby shebangfix
+SHEBANG_FILES= src/bsfilter.rb
+
+USE_GITHUB= yes
+GH_ACCOUNT= nbkenichi
+
NO_BUILD= yes
OPTIONS_DEFINE= EXAMPLES
do-install:
- ${INSTALL_SCRIPT} ${WRKSRC}/bsfilter/${PORTNAME} ${STAGEDIR}${PREFIX}/bin/${PORTNAME}
+ ${INSTALL_SCRIPT} ${WRKSRC}/src/bsfilter.rb ${STAGEDIR}${PREFIX}/bin/${PORTNAME}
@${MKDIR} ${STAGEDIR}${EXAMPLESDIR}
.for FILE in bsfilter.conf.sample dot-qmail.sample
${INSTALL_DATA} ${FILESDIR}/${FILE} ${STAGEDIR}${EXAMPLESDIR}
diff --git a/mail/bsfilter/distinfo b/mail/bsfilter/distinfo
index 34c3fcd2b09a..8d4020051465 100644
--- a/mail/bsfilter/distinfo
+++ b/mail/bsfilter/distinfo
@@ -1,2 +1,3 @@
-SHA256 (bsfilter-1.0.19.tgz) = 8aa1d713cc848b20d678eb7a5f24bec1879860d023701644bfd426a587998ac9
-SIZE (bsfilter-1.0.19.tgz) = 78660
+TIMESTAMP = 1740387732
+SHA256 (nbkenichi-bsfilter-v1.0.20_GH0.tar.gz) = 10fb704f3528f3a81ed350c42d0980d9104ed11b366d85a5795fbbc6e04b91db
+SIZE (nbkenichi-bsfilter-v1.0.20_GH0.tar.gz) = 78005
diff --git a/mail/bsfilter/files/patch-bsfilter_bsfilter b/mail/bsfilter/files/patch-bsfilter_bsfilter
deleted file mode 100644
index ff5d3d2ca636..000000000000
--- a/mail/bsfilter/files/patch-bsfilter_bsfilter
+++ /dev/null
@@ -1,5075 +0,0 @@
---- bsfilter/bsfilter.orig 2013-11-03 10:22:15 UTC
-+++ bsfilter/bsfilter
-@@ -1,6 +1,6 @@
- #! /usr/bin/env ruby
--## -*-Ruby-*- $Id: bsfilter,v 1.87 2013/11/03 10:22:15 nabeken Exp $
--## Copyright (C) 2003, 2004, 2005, 2006 NABEYA Kenichi
-+## -*-Ruby-*- $Id: bsfilter,v 1.89 2023/12/26 05:52:39 nabeken Exp $
-+## Copyright (C) 2003-2023 NABEYA Kenichi
- ##
- ## This program is free software; you can redistribute it and/or modify
- ## it under the terms of the GNU General Public License as published by
-@@ -16,115 +16,112 @@
- ## along with this program; if not, write to the Free Software
- ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-+require 'English'
- require 'getoptlong'
- require 'nkf'
-
- class Bsfilter
- def initialize
-- @threads = Array::new
-+ @threads = []
- @token_dbs = nil
-- @options = Hash::new
-- @db_hash = Hash::new
-+ @options = {}
-+ @db_hash = {}
- @jtokenizer = nil
- end
- attr_accessor :token_dbs
-
-- Release = "$Name: release_1_0_19 $".split[1].sub(/\A[^\d]*/, '').gsub(/_/, '.')
-- Release.concat("-") if (Release == "")
-- Revision = "$Revision: 1.87 $".gsub(/[^\.\d]/, '')
-- Languages = ["C", "ja"]
-- Default_Language = "C"
-+ Release = '$Name: $'.split[1].sub(/\A[^\d]*/, '').gsub(/_/, '.')
-+ Release.concat('-') if (Release == '')
-+ Revision = '$Revision: 1.89 $'.gsub(/[^.\d]/, '')
-+ Languages = %w[C ja].freeze
-+ Default_Language = 'C'.freeze
-
--## Options = Hash::new # used like a global variable
--## DB = Hash::new
--
-- Default_header_prefix = "Spam"
-- Default_spam_subject_prefix = "[SPAM] "
-- Default_refer_header =
-- ["Ufrom", "From", "To", "Cc", "Subject", "Reply-to", "Return-path", "Received",
-- "Content-Transfer-Encoding", "Content-Type", "charset", "Content-Disposition"].join(",")
--
-- Default_jtokenizer = "bigram"
-- Default_mark_in_token = "|!*'"
-- Default_homedir = ".bsfilter"
-- Default_conf_file = "bsfilter.conf"
-- Default_pid_file = "bsfilter.pid"
--
-- Default_method = "rf" # Robinson Fisher
-- Default_db = "sdbm"
-- Default_max_mail = 10000
-+ ## Options = Hash::new # used like a global variable
-+ ## DB = Hash::new
-+
-+ Default_header_prefix = 'Spam'.freeze
-+ Default_spam_subject_prefix = '[SPAM] '.freeze
-+ Default_refer_header =
-+ %w[Ufrom From To Cc Subject Reply-to Return-path Received
-+ Content-Transfer-Encoding Content-Type charset Content-Disposition].join(',')
-+
-+ Default_jtokenizer = 'bigram'.freeze
-+ Default_mark_in_token = "|!*'".freeze
-+ Default_homedir = '.bsfilter'.freeze
-+ Default_conf_file = 'bsfilter.conf'.freeze
-+ Default_pid_file = 'bsfilter.pid'.freeze
-+
-+ Default_method = 'rf'.freeze # Robinson Fisher
-+ Default_db = 'sdbm'.freeze
-+ Default_max_mail = 10_000
- Default_min_mail = 8000
- Default_max_line = 500
--
-- Default_pop_proxy_if = "0.0.0.0"
-- Default_pop_port = "110"
-- Default_pop_proxy_port = "10110"
-- Default_pop_max_size = 50000
--
-- Default_imap_port = "143"
-- Default_imap_auth = "auto"
-- Default_imap_auth_preference = ["cram-md5", "login", "loginc"]
-
-- Default_icon_number = 32512
--
-- Clean_ext = ".clean"
-- Spam_ext = ".spam"
-- Prob_ext = ".prob"
-- Lock_ext = ".lock"
--
-- SDBM_ext = ".sdbm"
-- GDBM_ext = ".gdbm"
-- BDB1_ext = ".bdb1"
-- BDB_ext = ".bdb"
-- QDBM_ext = ".qdbm"
--
-+ Default_pop_proxy_if = '0.0.0.0'.freeze
-+ Default_pop_port = '110'.freeze
-+ Default_pop_proxy_port = '10110'.freeze
-+ Default_pop_max_size = 50_000
-+
-+ Default_imap_port = '143'.freeze
-+ Default_imap_auth = 'auto'.freeze
-+ Default_imap_auth_preference = %w[cram-md5 login loginc].freeze
-+
-+ Default_icon_number = 32_512
-+
-+ Clean_ext = '.clean'.freeze
-+ Spam_ext = '.spam'.freeze
-+ Prob_ext = '.prob'.freeze
-+ Lock_ext = '.lock'.freeze
-+
-+ NDBM_ext = '.ndbm'.freeze
-+ SDBM_ext = '.sdbm'.freeze
-+ GDBM_ext = '.gdbm'.freeze
-+ BDB1_ext = '.bdb1'.freeze
-+ BDB_ext = '.bdb'.freeze
-+ QDBM_ext = '.qdbm'.freeze
-+
- EXIT_NORMAL = 0
- CODE_NORMAL = true
- CODE_SPAM = true
- CODE_CLEAN = false
--
-- CODESET_EUCJP = "eucJP"
-- CODESET_LATIN = "ISO8859-1"
-- CODESET_GB18030 = "GB18030"
-- CODESET_UTF8 = "UTF-8"
-- PATTERN_UTF8 = '[\xe0-\xef][\x80-\xbf][\x80-\xbf][\xe0-\xef][\x80-\xbf][\x80-\xbf]'
-- RE_UTF8 = Regexp.new(PATTERN_UTF8, nil, 'n')
--
-- ALL_TAGS = ["html", "head", "title", "meta", "body", "div", "spam",
-- "h1", "h2", "h3", "h4", "h5", "h6",
-- "em", "strong", "font", "basefont", "big", "small",
-- "b", "i", "s", "u", "tt", "sub", "sub",
-- "rb", "rp", "rt","ruby",
-- "blink", "marquee",
-- "dfn", "cite", "abbr", "acronym",
-- "blockquote", "q",
-- "br", "pre", "ins", "del", "center", "style", "hr",
-- "ul", "ol", "li", "dl", "dt", "dd",
-- "table", "caption", "thead", "tbody", "tfoot",
-- "colgroup", "col", "tr", "td", "th",
-- "a", "link", "base", "img", "address",
-- "form", "input", "select", "option", "textarea", "label",
-- "fieldset", "legend", "optgroup",
-- "frameset", "frame", "nofrmaes", "iframe"].join('|')
--
-- SPACE_TAGS = "br|p|td|tr|table|ul|ol|dl|li|dt|dd"
--
-- RE_ALL_TAGS = Regexp::compile('\A<(' + ALL_TAGS + ')\b', Regexp::IGNORECASE, 'n')
-- RE_SPACE_TAGS = Regexp::compile('\A<(' + SPACE_TAGS + ')\b', Regexp::IGNORECASE, 'n')
--
-- SOCKET_TIMEOUT = 30 # for single socket operation
--
-+
-+ LOG_CODESET = 'UTF-8'.freeze # codeset for verbose and debug message. nil => no conversion
-+
-+ ALL_TAGS = %w[html head title meta body div spam
-+ h1 h2 h3 h4 h5 h6
-+ em strong font basefont big small
-+ b i s u tt sub sub
-+ rb rp rt ruby
-+ blink marquee
-+ dfn cite abbr acronym
-+ blockquote q
-+ br pre ins del center style hr
-+ ul ol li dl dt dd
-+ table caption thead tbody tfoot
-+ colgroup col tr td th
-+ a link base img address
-+ form input select option textarea label
-+ fieldset legend optgroup
-+ frameset frame nofrmaes iframe].join('|')
-+
-+ SPACE_TAGS = 'br|p|td|tr|table|ul|ol|dl|li|dt|dd'.freeze
-+
-+ RE_ALL_TAGS = Regexp.compile('\A<(' + ALL_TAGS + ')\b', Regexp::IGNORECASE)
-+ RE_SPACE_TAGS = Regexp.compile('\A<(' + SPACE_TAGS + ')\b', Regexp::IGNORECASE)
-+
-+ SOCKET_TIMEOUT = 30 # for single socket operation
-+
- module Bsutil
- def insert_header!(buf, header, content)
- buf[0] =~ /([\r\n]*)\z/
-- eol = $1
--
-- (0 ... buf.length).each do |i|
-- if ((i == 0) && # unix from line
-+ eol = ::Regexp.last_match(1)
-+
-+ (0...buf.length).each do |i|
-+ if (i.zero? && # unix from line
- (buf[i] =~ /\A>?from\s+(\S+)/))
- next
-- elsif (buf[i] =~/\A(.*?:)/)
-- h = $1
-+ elsif (buf[i] =~ /\A(.*?:)/)
-+ h = ::Regexp.last_match(1)
- if (h == header)
- buf[i] = "#{header} #{content}#{eol}"
- return
-@@ -134,7 +131,7 @@ class Bsfilter
- elsif (buf[i] =~ /\A[\r\n]*\z/) # separator between header and body
- buf[i, 0] = "#{header} #{content}#{eol}"
- return
-- else # not header. may be body without separator
-+ else # not header. may be body without separator
- buf[i, 0] = "#{header} #{content}#{eol}"
- return
- end
-@@ -144,17 +141,17 @@ class Bsfilter
-
- def append_header!(buf, header, prefix)
- buf[0] =~ /([\r\n]*)\z/
-- eol = $1
-+ eol = ::Regexp.last_match(1)
- append_done = false
-- (0 ... buf.length).each do |i|
-- if (buf[i] =~/\A(.*?:)(\s*)(.*?)([\r\n]*)\z/)
-- h = $1
-- org_content = $3
-+ (0...buf.length).each do |i|
-+ if (buf[i] =~ /\A(.*?:)(\s*)(.*?)([\r\n]*)\z/)
-+ h = ::Regexp.last_match(1)
-+ org_content = ::Regexp.last_match(3)
- if (h.downcase == header.downcase)
- buf[i] = "#{header} #{prefix}#{org_content}#{eol}"
- append_done = true
- end
-- elsif ((! append_done) &&
-+ elsif (!append_done &&
- (((buf[i] =~ /\A\S/) && (buf[i] !~ /\A\S+:/)) || # found body without separator
- (buf[i] =~ /\A[\r\n]*\z/))) # separator between header and body
- buf[i, 0] = "#{header} #{prefix}#{eol}"
-@@ -166,104 +163,96 @@ class Bsfilter
- end
-
- def x_spam_flag
-- return sprintf("X-%s-Flag:", @options["header-prefix"])
-+ return format('X-%s-Flag:', @options['header-prefix'])
- end
--
-+
- def x_spam_probability
-- return sprintf("X-%s-Probability:", @options["header-prefix"])
-+ return format('X-%s-Probability:', @options['header-prefix'])
- end
--
-+
- def x_spam_revision
-- return sprintf("X-%s-Revision:", @options["header-prefix"])
-+ return format('X-%s-Revision:', @options['header-prefix'])
- end
--
-- def insert_headers!(buf, spam_flag, probability=nil)
-+
-+ def insert_headers!(buf, spam_flag, probability = nil)
- updated = false
-- if (@options["insert-revision"])
-+ if (@options['insert-revision'])
- insert_header!(buf, x_spam_revision, "bsfilter release #{Release} revision #{Revision}")
- updated = true
- end
-- if (@options["insert-flag"])
-+ if (@options['insert-flag'])
- updated = true
-- if (spam_flag)
-- insert_header!(buf, x_spam_flag, "Yes")
-+ if spam_flag
-+ insert_header!(buf, x_spam_flag, 'Yes')
- else
-- insert_header!(buf, x_spam_flag, "No")
-+ insert_header!(buf, x_spam_flag, 'No')
- end
- end
-- if (@options["insert-probability"] && probability)
-+ if (@options['insert-probability'] && probability)
- updated = true
-- insert_header!(buf, x_spam_probability, sprintf("%f", probability))
-+ insert_header!(buf, x_spam_probability, format('%f', probability))
- end
-- if (@options["mark-spam-subject"])
-+ if (@options['mark-spam-subject'])
- updated = true
-- if (spam_flag)
-- append_header!(buf, "Subject:", @options["spam-subject-prefix"])
-- end
-+ append_header!(buf, 'Subject:', @options['spam-subject-prefix']) if spam_flag
- end
- return updated
- end
-- end # end of module
-+ end
-
- include Bsutil
-
- class DevNull
-- def sync=(*args)
-- end
-- def print(*args)
-- end
-- def printf(*args)
-- end
-+ def sync=(*args); end
-+
-+ def print(*args); end
-+
-+ def printf(*args); end
- end
-
- class DBHash < Hash
-- def flatten(magic="###", head="", &block)
-- self.each do |k, v|
-- if (v.class == DBHash)
-- if (head == "")
-+ def flatten(magic = '###', head = '', &block)
-+ each do |k, v|
-+ if v.instance_of?(DBHash)
-+ if (head == '')
- v.flatten(magic, k, &block)
- else
- v.flatten(magic, head + magic + k, &block)
- end
-+ elsif (head == '')
-+ yield k, v
- else
-- if (head == "")
-- yield k, v
-- else
-- yield head + magic + k, v
-- end
-+ yield head + magic + k, v
- end
- end
- end
--
-+
- def add(hash)
- hash.each do |k, v|
- if (self[k])
-- if ((self[k].class == DBHash) &&
-- (v.class == DBHash))
-+ if (self[k].instance_of?(DBHash) &&
-+ v.instance_of?(DBHash))
- self[k].add(v)
- else
- self[k] += v
- end
- else
-- self[k] = v # should do deep copy ?
-+ self[k] = v # should do deep copy ?
- end
- end
- end
-+
- def sub(hash)
- hash.each do |k, v|
- if (self[k])
-- if ((self[k].class == DBHash) &&
-- (v.class == DBHash))
-+ if (self[k].instance_of?(DBHash) &&
-+ v.instance_of?(DBHash))
- self[k].sub(v)
-- if (self[k].empty?)
-- self.delete(k)
-- end
-+ delete(k) if self[k].empty?
-+ elsif (self[k] > v)
-+ self[k] -= v
- else
-- if (self[k] > v)
-- self[k] -= v
-- else
-- self.delete(k)
-- end
-+ delete(k)
- end
- end
- end
-@@ -271,38 +260,38 @@ class Bsfilter
- end
-
- def safe_require(file)
-- begin
-- require file
-- return true
-- rescue LoadError
-- return false
-- end
-+ require file
-+ return true
-+ rescue LoadError
-+ return false
- end
-
- def latin2ascii(str)
- str.force_encoding('ASCII-8BIT')
- newstr = str.tr("\x92\x93\x94".force_encoding('ASCII-8BIT'), "'''")
-- newstr.tr!("\xc0-\xc5\xc8-\xcb\xcc-\xcf\xd2-\xd6\xd9-\xdc".force_encoding('ASCII-8BIT'), "AAAAAAEEEEIIIIOOOOOUUUU")
-- newstr.tr!("\xe0-\xe5\xe8-\xeb\xec-\xef\xf2-\xf6\xf9-\xfc".force_encoding('ASCII-8BIT'), "aaaaaaeeeeiiiiooooouuuu")
-+ newstr.tr!("\xc0-\xc5\xc8-\xcb\xcc-\xcf\xd2-\xd6\xd9-\xdc".force_encoding('ASCII-8BIT'), 'AAAAAAEEEEIIIIOOOOOUUUU')
-+ newstr.tr!("\xe0-\xe5\xe8-\xeb\xec-\xef\xf2-\xf6\xf9-\xfc".force_encoding('ASCII-8BIT'), 'aaaaaaeeeeiiiiooooouuuu')
- return newstr
- end
-
- def u2eucjp(str)
-- return NKF::nkf('-e -E -X -Z0', str.encode('EUC-JP', 'UTF-8', :undef => :replace, :invalid => :replace))
-+ return NKF.nkf('-e -E -X -Z0', str.encode('EUC-JP', 'UTF-8', undef: :replace, invalid: :replace))
- end
-+
- def u2latin(str)
-- return str.encode('US-ASCII', 'UTF-8', :undef => :replace, :invalid => :replace)
-+ return str.encode('US-ASCII', 'UTF-8', undef: :replace, invalid: :replace)
- end
-+
- def gb180302eucjp(str)
-- return str.encode('EUC-JP', 'BIG5', :undef => :replace, :invalid => :replace)
-+ return str.encode('EUC-JP', 'BIG5', undef: :replace, invalid: :replace)
- end
--
-+
- def open_ro(file)
-- if (file == "-")
-- fh = STDIN
-+ if (file == '-')
-+ fh = $stdin
- yield fh
-- elsif (file.class == Array)
-- file.instance_eval <<EOM
-+ elsif file.instance_of?(Array)
-+ file.instance_eval <<EOM, __FILE__, __LINE__ + 1
- @eof = false
- def gets
- @n = 0 if (! @n)
-@@ -323,66 +312,67 @@ class Bsfilter
- EOM
- yield file
- else
-- if (! FileTest::file?(file))
-- raise sprintf("%s is not file", file)
-+ if (! FileTest.file?(file))
-+ raise format('%s is not file', file)
- end
-- fh = open(file, "rb")
-+
-+ fh = File.open(file, 'rb')
- yield fh
- fh.close
- end
- end
--
-+
- def open_wo(file, &block)
-- if (file == "-")
-- fh = STDOUT
-+ if (file == '-')
-+ fh = $stdout
- else
-- fh = open(file, "wb")
-+ fh = open(file, 'wb')
- end
- if (block)
- yield fh
-- if (file != "-")
-+ if (file != '-')
- fh.close
- end
- else
- return fh
- end
- end
--
-+
- class FLOAT
-- def initialize(f=0, power=1)
-+ def initialize(f = 0, power = 1)
- @mant = 0
- @exp = 0
- set_f(f, power)
- end
- attr_accessor :mant, :exp
--
-+
- def to_f
-- return @mant * Math::exp(@exp)
-+ return @mant * Math.exp(@exp)
- end
--
-+
- def ln
-- return Math::log(@mant) + @exp
-+ return Math.log(@mant) + @exp
- end
--
-- def * (a)
-- if (a.class == FLOAT)
-- n = FLOAT::new
-+
-+ def *(a)
-+ n = FLOAT.new
-+ if a.instance_of?(FLOAT)
- n.mant = @mant * a.mant
- n.exp = @exp + a.exp
- else
-- n = FLOAT::new
- n.exp = @exp
- n.mant = @mant * a
- end
- return n
- end
-- def set_f (a, power=1)
-- if (a > 0)
-+
-+ def set_f(a, power = 1)
-+ if a.positive?
- @mant = 1
-- @exp = Math::log(a) * power
-- elsif (a < 0)
-+ @exp = Math.log(a) * power
-+ elsif a.negative?
- @mant = -1
-- @exp = Math::log(-a) * power
-+ @exp = Math.log(-a) * power
- else
- @mant = 0
- @exp = 0
-@@ -390,24 +380,24 @@ EOM
- self
- end
- end
--
--
-+
- module TokenAccess
- def check_size(max_size, min_size)
- if ((@file_count <= max_size) || (max_size <= 0) || (min_size <= 0))
- return false
- end
-+
- old_count = @file_count
-- if (@options["verbose"])
-- @options["message-fh"].printf("reduce token database %s from %d to %d\n", @filename, old_count, min_size)
-+ if (@options['verbose'])
-+ @options['message-fh'].printf("reduce token database %s from %d to %d\n", @filename, old_count, min_size)
- end
--
-+
- key_cts.each do |(category, token)|
-- if (category != ".internal")
-+ if (category != '.internal')
- v = value(category, token) || 0
- sub_scalar(category, token, (v * (old_count - min_size).to_f / old_count.to_f).ceil)
-- if (@options["debug"] && ! value(category, token))
-- @options["message-fh"].printf("deleted %s %s\n", category, token)
-+ if (@options['debug'] && ! value(category, token))
-+ @options['message-fh'].printf("deleted %s %s\n", category, token.to_utf8)
- end
- end
- end
-@@ -415,41 +405,47 @@ EOM
- @dirty = true
- return true
- end
--
-+
- def value_with_degene(category, token)
-- if (value(category, token))
-+ if value(category, token)
- return value(category, token)
-- elsif (! @options["degeneration"]) # no degeneration
-+ elsif (!@options['degeneration']) # no degeneration
- return nil
- else
-- if (v = value(category, token[0 .. -2])) # cut last char
-- return v
-+ if (v = value(category, token[0..-2])) # cut last char
-+ return v
- end
-- token = token.gsub(Regexp::compile("[#{@options['mark-in-token']}]"), '')
-+
-+ token = token.gsub(Regexp.compile("[#{@options['mark-in-token']}]"), '')
- if (v = value(category, token))
-- return v
-+ return v
- end
-+
- token = token.downcase
- if (v = value(category, token))
-- return v
-+ return v
- end
-+
- token = token.upcase
- if (v = value(category, token))
-- return v
-+ return v
- end
-+
- token = token.capitalize
- if (v = value(category, token))
-- return v
-+ return v
- end
-+
- return nil
- end
- end
-+
- def set_scalar(category, token, val)
- @dirty = true
- @file_count += 1
- set(category, token, val)
- end
--
-+
- def add_scalar(category, token, val)
- @dirty = true
- @file_count += 1
-@@ -459,58 +455,58 @@ EOM
- set(category, token, val)
- end
- end
--
-+
- def show_new_token(db)
- db.each_ct do |category, token|
-- if (! value(category, token) || (value(category, token) == 0))
-- @options["message-fh"].printf("new %s %s\n", category, token)
-+ if (!value(category, token) || value(category, token).zero?)
-+ @options['message-fh'].printf("new %s %s\n", category, token.to_utf8)
- end
- end
- end
--
-+
- def values
-- array = Array::new
-+ array = []
- each_ct do |c, t|
- array.push(value(c, t))
- end
- return array
- end
--
-+
- def key_cts
-- array = Array::new
-+ array = []
- each_ct do |c, t|
- array.push([c, t])
- end
- return array
- end
--
-+
- def export(fh)
- each_ct do |category, token|
-- fh.printf("%s %s %s %g\n", @language, category, token, value(category, token)) if (value(category, token))
-+ fh.printf("%s %s %s %g\n", @language, category, token, value(category, token)) if value(category, token)
- end
- end
- end
--
-+
- class TokenDB
- include TokenAccess
--
-- def initialize(language=nil)
-- @hash = DBHash::new
-+
-+ def initialize(language = nil)
-+ @hash = DBHash.new
- @file_count = 0
- @language = language
-- @message_id = "-"
-+ @message_id = '-'
- @probability = nil
- @spam_flag = nil
- @dirty = false
- @time = nil
-- @filename = "-"
-+ @filename = '-'
- end
- attr_accessor :hash, :file_count, :probability, :language, :spam_flag, :message_id, :time, :filename
--
-+
- def size
- @hash.size
- end
--
-+
- def each_ct
- @hash.each_key do |category|
- @hash[category].each_key do |token|
-@@ -518,9 +514,9 @@ EOM
- end
- end
- end
--
-+
- def value(category, token)
-- if (! @hash[category])
-+ if (!@hash[category])
- return nil
- elsif (v = @hash[category][token])
- return v
-@@ -528,14 +524,14 @@ EOM
- return nil
- end
- end
--
-+
- def set(category, token, v)
- @dirty = true
-- @hash[category] = DBHash::new if (! @hash[category])
-+ @hash[category] = DBHash.new if (! @hash[category])
- @hash[category][token] = v
- end
--
-- def print_keys_to_str(hash, separator, fh=STDOUT)
-+
-+ def print_keys_to_str(hash, separator, fh = $stdout)
- hash.keys.sort.each do |k|
- v = hash[k]
- v = v.to_i
-@@ -543,57 +539,49 @@ EOM
- fh.print(([k] * v).join(separator))
- end
- end
--
-+
- def clear
- @dirty = true
- @file_count = 0
-- @hash = DBHash::new
-+ @hash = DBHash.new
- end
--
-+
- def add_db(db)
- @dirty = true
- @file_count += db.file_count
-- if (! @language && db.language)
-- @language = db.language
-- end
-+ @language = db.language if (!@language && db.language)
- @hash.add(db.hash)
- end
--
-+
- def add_hash(hash)
- @dirty = true
- @file_count += 1
- @hash.add(hash)
- end
--
-+
- def sub_scalar(category, token, val)
-- if (@file_count > 0)
-- @file_count -= 1
-- end
-- @hash.sub({category => {token => val}})
-+ @file_count -= 1 if @file_count.positive?
-+ @hash.sub({ category => { token => val } })
- end
--
-+
- def sub_hash(hash)
- @dirty = true
-- if (@file_count > 0)
-- @file_count -= 1
-- end
-+ @file_count -= 1 if @file_count.positive?
- @hash.sub(hash)
- end
--
-+
- def sub_db(db)
- @dirty = true
- @file_count -= db.file_count
-- if (@file_count < 1)
-- @file_count = 1
-- end
-+ @file_count = 1 if (@file_count < 1)
- @hash.sub(db.hash)
- end
- end
--
-+
- class TokenDBM
- include TokenAccess
-- MAGIC = "###"
-- def initialize(options, language, ext)
-+ MAGIC = '###'.freeze
-+ def initialize(options, language, _ext)
- @options = options
- @dbm = nil # SDBM not Hash
- @dirty = nil # not used. for TokenAccess
-@@ -602,13 +590,13 @@ EOM
- @language = language
- end
- attr_accessor :file_count
--
-+
- def size
- @dbm.size
- end
--
-+
- def to_db
-- token_db = TokenDB::new(@language)
-+ token_db = TokenDB.new(@language)
- @dbm.each do |ct, v|
- (category, token) = ct.split(Regexp.new(MAGIC), 2)
- token_db.set(category, token, v)
-@@ -616,25 +604,25 @@ EOM
- end
- return token_db
- end
--
-+
- def clear
- @dbm.clear
- @file_count = 0
-- set(".internal", "file_count", 0)
-+ set('.internal', 'file_count', 0)
- end
--
-+
- def each_ct
- @dbm.each_key do |ct|
- (category, token) = ct.force_encoding('ASCII-8BIT').split(Regexp.new(MAGIC), 2)
- yield(category, token) if (category && token)
- end
- end
--
-+
- def add_db(token_db)
- add_hash(token_db.hash)
- @file_count += + token_db.file_count
- end
--
-+
- def add_hash(hash)
- @dirty = true
- hash.flatten(MAGIC) do |k, v|
-@@ -645,15 +633,16 @@ EOM
- end
- end
- end
--
-+
- def sub_db(token_db)
- sub_hash(token_db.hash)
- if (@file_count > token_db.file_count)
- @file_count -= token_db.file_count
- else
-- @file_count= 0
-+ @file_count = 0
- end
- end
-+
- def sub_hash(hash)
- @dirty = true
- hash.flatten(MAGIC) do |k, v|
-@@ -666,24 +655,27 @@ EOM
- end
- end
- end
--
-+
- def value(category, token)
- v = @dbm[category + MAGIC + token]
-- if (v)
-- return v.to_f
-- else
-- return nil
-- end
-+ return v.to_f if v
-+
-+ return nil
- end
--
-+
- def set(category, token, v)
- @dirty = true
-- @dbm[category + MAGIC + token] = v.to_s
-+ begin
-+ @dbm[category + MAGIC + token] = v.to_s
-+ rescue
-+ @options['message-fh'].puts($ERROR_INFO.inspect, category + MAGIC + token, v.to_s) if (@options['verbose'])
-+ @options['message-fh'].puts($ERROR_POSITION) if (@options['debug'])
-+ end
- end
--
-+
*** 4170 LINES SKIPPED ***