git: 50bb273b4288 - main - textproc/itstool: Import upstream patch and pull request
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 02 Sep 2025 18:15:22 UTC
The branch main has been updated by sunpoet:
URL: https://cgit.FreeBSD.org/ports/commit/?id=50bb273b4288f48f6ab24abca6aabc4dd564add6
commit 50bb273b4288f48f6ab24abca6aabc4dd564add6
Author: Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2025-09-02 18:03:10 +0000
Commit: Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2025-09-02 18:10:26 +0000
textproc/itstool: Import upstream patch and pull request
- Fix Python 3.12+ warning about regular expressions [1]
- Replace py-libxml2-python with py-lxml [2]
- Bump PORTREVISION for package change
PR: 287143
Reported by: diizzy
Obtained from: https://github.com/itstool/itstool/commit/32c7d07664dc37765100285d1202d488cd6a27e8 [1]
https://github.com/itstool/itstool/pull/57 [2]
---
textproc/itstool/Makefile | 9 +-
textproc/itstool/distinfo | 4 +-
textproc/itstool/files/patch-PR18 | 88 --
textproc/itstool/files/patch-itstool.in | 52 --
textproc/itstool/files/patch-py-lxml | 1490 +++++++++++++++++++++++++++++++
5 files changed, 1499 insertions(+), 144 deletions(-)
diff --git a/textproc/itstool/Makefile b/textproc/itstool/Makefile
index 30f5f2ed34bb..eaae15ca3c38 100644
--- a/textproc/itstool/Makefile
+++ b/textproc/itstool/Makefile
@@ -1,9 +1,12 @@
PORTNAME= itstool
PORTVERSION= 2.0.7
-PORTREVISION= 2
+PORTREVISION= 3
CATEGORIES= textproc
MASTER_SITES= https://files.itstool.org/itstool/
+PATCH_SITES= https://github.com/itstool/itstool/commit/
+PATCHFILES= 32c7d07664dc37765100285d1202d488cd6a27e8.patch:-p1
+
MAINTAINER= sunpoet@FreeBSD.org
COMMENT= Translate XML with PO files using W3C Internationalization Tag Set rules
WWW= https://itstool.org/ \
@@ -12,9 +15,9 @@ WWW= https://itstool.org/ \
LICENSE= GPLv3
LICENSE_FILE= ${WRKSRC}/COPYING.GPL3
-RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}libxml2-python>=0:textproc/libxml2-python@${PY_FLAVOR}
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml5>=0:devel/py-lxml5@${PY_FLAVOR}
-USES= python tar:bzip2
+USES= autoreconf python tar:bzip2
GNU_CONFIGURE= yes
NO_ARCH= yes
diff --git a/textproc/itstool/distinfo b/textproc/itstool/distinfo
index 6f95f612faf6..f9d5bbff5dfa 100644
--- a/textproc/itstool/distinfo
+++ b/textproc/itstool/distinfo
@@ -1,3 +1,5 @@
-TIMESTAMP = 1632582980
+TIMESTAMP = 1756517770
SHA256 (itstool-2.0.7.tar.bz2) = 6b9a7cd29a12bb95598f5750e8763cee78836a1a207f85b74d8b3275b27e87ca
SIZE (itstool-2.0.7.tar.bz2) = 104648
+SHA256 (32c7d07664dc37765100285d1202d488cd6a27e8.patch) = 4e64a2e884f9d4cbc493732fcbde9f1d5bed534f9a66330bbcc1cbeb54808c1e
+SIZE (32c7d07664dc37765100285d1202d488cd6a27e8.patch) = 3095
diff --git a/textproc/itstool/files/patch-PR18 b/textproc/itstool/files/patch-PR18
deleted file mode 100644
index b4cafecdb0b1..000000000000
--- a/textproc/itstool/files/patch-PR18
+++ /dev/null
@@ -1,88 +0,0 @@
-# https://github.com/itstool/itstool/pull/18
-# https://github.com/itstool/itstool/issues/17
-
-From 98d04cdabf1721cb541ecd234c975f13fde4fa41 Mon Sep 17 00:00:00 2001
-From: Guido Trentalancia <guido@trentalancia.com>
-Date: Wed, 1 Nov 2017 18:20:36 +0100
-Subject: [PATCH 1/2] Revert "Be more careful about libxml2 memory management"
-
-This reverts commit 9b84c007a73e8275ca45762f1bfa3ab7c3a852e2.
----
- itstool.in | 13 ++-----------
- 1 file changed, 2 insertions(+), 11 deletions(-)
-
-diff --git itstool.in itstool.in
-index a16eba9..c1d0585 100755
---- itstool.in
-+++ itstool.in
-@@ -477,7 +477,6 @@ class Document (object):
- if load_dtd:
- ctxt.loadSubset(1)
- if keep_entities:
-- ctxt.loadSubset(1)
- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
- ctxt.replaceEntities(0)
- else:
-@@ -1044,7 +1043,6 @@ class Document (object):
- if self._load_dtd:
- ctxt.loadSubset(1)
- if self._keep_entities:
-- ctxt.loadSubset(1)
- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
- ctxt.replaceEntities(0)
- else:
-@@ -1071,9 +1069,7 @@ class Document (object):
- ph_node = msg.get_placeholder(child.name).node
- if self.has_child_elements(ph_node):
- self.merge_translations(translations, None, ph_node, strict=strict)
-- newnode = ph_node.copyNode(1)
-- newnode.setTreeDoc(self._doc)
-- child.replaceNode(newnode)
-+ child.replaceNode(ph_node)
- else:
- repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
- child.replaceNode(repl)
-@@ -1088,15 +1084,10 @@ class Document (object):
- (lang + ' ') if lang is not None else '',
- msgstr.encode('utf-8')))
- self._xml_err = ''
-- ctxt.doc().freeDoc()
- return node
- retnode = node.copyNode(2)
-- retnode.setTreeDoc(self._doc)
- for child in xml_child_iter(trnode):
-- newnode = child.copyNode(1)
-- newnode.setTreeDoc(self._doc)
-- retnode.addChild(newnode)
--
-+ retnode.addChild(child.copyNode(1))
- ctxt.doc().freeDoc()
- return retnode
-
-
-From 1549b6d12eb2f35e5c7f1b1856c21768e92ba794 Mon Sep 17 00:00:00 2001
-From: Guido Trentalancia <guido@trentalancia.com>
-Date: Wed, 1 Nov 2017 18:23:44 +0100
-Subject: [PATCH 2/2] Fix a segmentation fault bug introduced with version
- 2.0.4.
-
-https://github.com/itstool/itstool/issues/17
-
-This fix seems a lot easier than the previous reverted commit.
----
- itstool.in | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git itstool.in itstool.in
-index c1d0585..e492e95 100755
---- itstool.in
-+++ itstool.in
-@@ -1048,7 +1048,7 @@ class Document (object):
- else:
- ctxt.replaceEntities(1)
- ctxt.parseDocument()
-- trnode = ctxt.doc().getRootElement()
-+ trnode = ctxt.doc().getRootElement().copyNode(1)
- try:
- self._check_errors()
- except libxml2.parserError:
diff --git a/textproc/itstool/files/patch-itstool.in b/textproc/itstool/files/patch-itstool.in
deleted file mode 100644
index ea6ed79c3781..000000000000
--- a/textproc/itstool/files/patch-itstool.in
+++ /dev/null
@@ -1,52 +0,0 @@
-# Workaround https://github.com/itstool/itstool/issues/25
-# Obtained from Fedora
-
---- itstool.in.orig 2018-08-21 15:27:24 UTC
-+++ itstool.in
-@@ -44,9 +44,22 @@ if PY3:
- else:
- return str(s)
- ustr_type = str
-+ def pr_str(s):
-+ """Return a string that can be safely print()ed"""
-+ # Since print works on both bytes and unicode, just return the argument
-+ return s
- else:
- string_types = basestring,
- ustr = ustr_type = unicode
-+ def pr_str(s):
-+ """Return a string that can be safely print()ed"""
-+ if isinstance(s, str):
-+ # Since print works on str, just return the argument
-+ return s
-+ else:
-+ # print may not work on unicode if the output encoding cannot be
-+ # detected, so just encode with UTF-8
-+ return unicode.encode(s, 'utf-8')
-
- NS_ITS = 'http://www.w3.org/2005/11/its'
- NS_ITST = 'http://itstool.org/extensions/'
-@@ -1060,9 +1073,9 @@ class Document (object):
- if strict:
- raise
- else:
-- sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
-+ sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
- (lang + ' ') if lang is not None else '',
-- msgstr.encode('utf-8')))
-+ msgstr)))
- self._xml_err = ''
- return node
- def scan_node(node):
-@@ -1087,9 +1100,9 @@ class Document (object):
- if strict:
- raise
- else:
-- sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
-+ sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
- (lang + ' ') if lang is not None else '',
-- msgstr.encode('utf-8')))
-+ msgstr)))
- self._xml_err = ''
- ctxt.doc().freeDoc()
- return node
diff --git a/textproc/itstool/files/patch-py-lxml b/textproc/itstool/files/patch-py-lxml
new file mode 100644
index 000000000000..897eaf6349d7
--- /dev/null
+++ b/textproc/itstool/files/patch-py-lxml
@@ -0,0 +1,1490 @@
+Obtained from: https://github.com/itstool/itstool/pull/57
+
+--- configure.ac.orig 2021-09-25 15:09:48 UTC
++++ configure.ac
+@@ -12,7 +12,7 @@ AM_PATH_PYTHON([2.6])
+
+ AM_PATH_PYTHON([2.6])
+
+-py_module=libxml2
++py_module=lxml
+ AC_MSG_CHECKING(for python module $py_module)
+ echo "import $py_module" | $PYTHON - &>/dev/null
+ if test $? -ne 0; then
+--- itstool.in.orig 2025-08-30 01:59:59 UTC
++++ itstool.in
+@@ -24,7 +24,8 @@ import hashlib
+
+ import gettext
+ import hashlib
+-import libxml2
++from copy import deepcopy
++from lxml import etree
+ import optparse
+ import os
+ import os.path
+@@ -190,7 +191,7 @@ class Placeholder (object):
+ class Placeholder (object):
+ def __init__ (self, node):
+ self.node = node
+- self.name = ustr(node.name, 'utf-8')
++ self.name = ustr(xml_localname(node), 'utf-8')
+
+
+ class Message (object):
+@@ -243,32 +244,30 @@ class Message (object):
+ def add_start_tag (self, node):
+ if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
+ self._message.append('')
+- if node.ns() is not None and node.ns().name is not None:
+- self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
+- else:
+- self._message[-1] += ('<%s' % ustr(node.name, 'utf-8'))
+- for prop in xml_attr_iter(node):
+- name = prop.name
+- if prop.ns() is not None:
+- name = prop.ns().name + ':' + name
+- atval = prop.content
++ self._message[-1] += ('<%s' % ustr(xml_qname(node), 'utf-8'))
++ for name, atval in node.items():
++ qname = etree.QName(name)
++ if qname.namespace is not None:
++ # lxml doesn't expose the prefix of attributes, so we use
++ # an XPath expression to get the attribute's prefixed name.
++ # This is horribly inefficient.
++ expr = 'name(@*[local-name()="%s" and namespace-uri()="%s"])' % (
++ qname.localname, qname.namespace)
++ name = node.xpath(expr)
+ if not isinstance(atval, ustr_type):
+ atval = ustr(atval, 'utf-8')
+ atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
+ self._message += " %s=\"%s\"" % (name, atval)
+- if node.children is not None:
++ if len(node) > 0 or node.text:
+ self._message[-1] += '>'
+ else:
+ self._message[-1] += '/>'
+
+ def add_end_tag (self, node):
+- if node.children is not None:
++ if len(node) > 0 or node.text:
+ if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
+ self._message.append('')
+- if node.ns() is not None and node.ns().name is not None:
+- self._message[-1] += ('</%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
+- else:
+- self._message[-1] += ('</%s>' % ustr(node.name, 'utf-8'))
++ self._message[-1] += ('</%s>' % ustr(xml_qname(node), 'utf-8'))
+
+ def is_empty (self):
+ return self._empty
+@@ -379,69 +378,86 @@ class Message (object):
+ return ret
+
+
+-def xml_child_iter (node):
+- child = node.children
+- while child is not None:
+- yield child
+- child = child.next
++def xml_localname (node):
++ return etree.QName(node.tag).localname
+
+-def xml_attr_iter (node):
+- attr = node.get_properties()
+- while attr is not None:
+- yield attr
+- attr = attr.next
++def xml_qname (node):
++ qname = etree.QName(node.tag).localname
++ if node.prefix is not None:
++ qname = node.prefix + ':' + qname
++ return qname
+
+-def xml_is_ns_name (node, ns, name):
+- if node.type != 'element':
+- return False
+- return node.name == name and node.ns() is not None and node.ns().content == ns
++def xml_content (node):
++ if isinstance(node, string_types):
++ return node
++ if isinstance(node, XMLAttr):
++ return node.parent.get(node.tag)
++ return etree.tostring(node, method='text', encoding='unicode')
+
++def xml_delete_node (node):
++ parent = node.getparent()
++ prev = node.getprevious()
++ tail = node.tail
++ if parent is not None:
++ parent.remove(node)
++ if prev is not None:
++ if prev.tail is None or re.fullmatch(r'\s+', prev.tail):
++ prev.tail = tail
++ else:
++ prev.tail += tail
++ elif parent is not None:
++ if parent.text is None or re.fullmatch(r'\s+', parent.text):
++ parent.text = tail
++ else:
++ parent.text += tail
++
+ def xml_get_node_path(node):
+ # The built-in nodePath() method only does numeric indexes
+ # when necessary for disambiguation. For various reasons,
+ # we prefer always using indexes.
+- name = node.name
+- if node.ns() is not None and node.ns().name is not None:
+- name = node.ns().name + ':' + name
+- if node.type == 'attribute':
++ name = xml_qname(node)
++ if isinstance(node, XMLAttr):
+ name = '@' + name
+ name = '/' + name
+- if node.type == 'element' and node.parent.type == 'element':
++ if node.getparent() is not None:
+ count = 1
+- prev = node.previousElementSibling()
++ prev = node.getprevious()
+ while prev is not None:
+- if prev.name == node.name:
+- if prev.ns() is None:
+- if node.ns() is None:
+- count += 1
+- else:
+- if node.ns() is not None:
+- if prev.ns().name == node.ns().name:
+- count += 1
+- prev = prev.previousElementSibling()
++ if prev.tag == node.tag:
++ count += 1
++ prev = prev.getprevious()
+ name = '%s[%i]' % (name, count)
+- if node.parent.type == 'element':
+- name = xml_get_node_path(node.parent) + name
++ name = xml_get_node_path(node.getparent()) + name
+ return name
+
+-def xml_error_catcher(doc, error):
+- doc._xml_err += " %s" % error
+
+-def fix_node_ns (node, nsdefs):
+- childnsdefs = nsdefs.copy()
+- nsdef = node.nsDefs()
+- while nsdef is not None:
+- nextnsdef = nsdef.next
+- if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content:
+- node.removeNsDef(nsdef.content)
+- else:
+- childnsdefs[nsdef.name] = nsdef.content
+- nsdef = nextnsdef
+- for child in xml_child_iter(node):
+- if child.type == 'element':
+- fix_node_ns(child, childnsdefs)
++# lxml doesn't support attribute nodes, so we have to emulate them.
++class XMLAttr (object):
++ def __init__(self, element, tag):
++ self.parent = element
++ self.tag = tag
++ self.attrib = {}
++ self.sourceline = element.sourceline
+
++ def __repr__(self):
++ return '%s@%s' % (repr(self.parent), self.tag)
+
++ def __eq__(self, other):
++ return other and self.parent == other.parent and self.tag == other.tag
++
++ def __ne__(self, other):
++ return not self.__eq__(other)
++
++ def __hash__(self):
++ return hash(repr(self))
++
++ def getparent(self):
++ return self.parent
++
++ def get(self, default=None):
++ return default
++
++
+ class LocNote (object):
+ def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False):
+ self.locnote = locnote
+@@ -464,82 +480,51 @@ class Document (object):
+
+ class Document (object):
+ def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
+- self._xml_err = ''
+- libxml2.registerErrorHandler(xml_error_catcher, self)
+- try:
+- ctxt = libxml2.createFileParserCtxt(filename)
+- except:
+- sys.stderr.write('Error: cannot open XML file %s\n' % filename)
+- sys.exit(1)
+- ctxt.lineNumbers(1)
+ self._load_dtd = load_dtd
+ self._keep_entities = keep_entities
+- if load_dtd:
+- ctxt.loadSubset(1)
+- if keep_entities:
+- ctxt.loadSubset(1)
+- ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
+- ctxt.replaceEntities(0)
+- else:
+- ctxt.replaceEntities(1)
+- ctxt.parseDocument()
++ parser = etree.XMLParser(load_dtd = load_dtd or keep_entities,
++ resolve_entities = not(keep_entities))
++ doc = etree.parse(filename, parser)
++ doc.xinclude()
+ self._filename = filename
+- self._doc = ctxt.doc()
++ self._doc = doc
+ self._localrules = []
+- def pre_process (node):
+- for child in xml_child_iter(node):
+- if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'):
+- if child.nsProp('parse', None) == 'text':
+- child.xincludeProcessTree()
+- elif xml_is_ns_name(child, NS_ITS, 'rules'):
+- if child.hasNsProp('href', NS_XLINK):
+- href = child.nsProp('href', NS_XLINK)
+- fileref = os.path.join(os.path.dirname(filename), href)
+- if not os.path.exists(fileref):
+- if opts.itspath is not None:
+- for pathdir in opts.itspath:
+- fileref = os.path.join(pathdir, href)
+- if os.path.exists(fileref):
+- break
+- if not os.path.exists(fileref):
+- sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
+- sys.exit(1)
+- hctxt = libxml2.createFileParserCtxt(fileref)
+- hctxt.replaceEntities(1)
+- hctxt.parseDocument()
+- root = hctxt.doc().getRootElement()
+- version = None
+- if root.hasNsProp('version', None):
+- version = root.nsProp('version', None)
+- else:
+- sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
+- os.path.basename(href))
+- if version is not None and version not in ('1.0', '2.0'):
+- sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
+- (os.path.basename(href), root.nsProp('version', None)))
+- else:
+- self._localrules.append(root)
++ for child in doc.iter():
++ if child.tag == '{' + NS_ITS + '}rules':
++ href = child.get('{' + NS_XLINK + '}href')
++ if href is not None:
++ fileref = os.path.join(os.path.dirname(filename), href)
++ if not os.path.exists(fileref):
++ if opts.itspath is not None:
++ for pathdir in opts.itspath:
++ fileref = os.path.join(pathdir, href)
++ if os.path.exists(fileref):
++ break
++ if not os.path.exists(fileref):
++ sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
++ sys.exit(1)
++ root = etree.parse(fileref).getroot()
+ version = None
+- if child.hasNsProp('version', None):
+- version = child.nsProp('version', None)
++ version = root.get('version')
++ if version is None:
++ sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
++ os.path.basename(href))
++ elif version not in ('1.0', '2.0'):
++ sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
++ (os.path.basename(href), root.get('version')))
+ else:
+- root = child.doc.getRootElement()
+- if root.hasNsProp('version', NS_ITS):
+- version = root.nsProp('version', NS_ITS)
+- else:
+- sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
+- if version is not None and version not in ('1.0', '2.0'):
+- sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
+- version)
+- else:
+- self._localrules.append(child)
+- pre_process(child)
+- pre_process(self._doc)
+- try:
+- self._check_errors()
+- except libxml2.parserError as e:
+- sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e))
+- sys.exit(1)
++ self._localrules.append(root)
++ version = child.get('version')
++ if version is None:
++ root = child.getroottree()
++ version = root.get('{' + NS_ITS + '}version')
++ if version is None:
++ sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
++ elif version not in ('1.0', '2.0'):
++ sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
++ version)
++ else:
++ self._localrules.append(child)
+ self._msgs = messages
+ self._its_translate_nodes = {}
+ self._its_within_text_nodes = {}
+@@ -556,13 +541,6 @@ class Document (object):
+
+ self._clear_cache()
+
+- def __del__ (self):
+- self._doc.freeDoc()
+-
+- def _check_errors(self):
+- if self._xml_err:
+- raise libxml2.parserError(self._xml_err)
+-
+ def _clear_cache(self):
+ self._its_translate_nodes_cache = {}
+ self._its_locale_filters_cache = {}
+@@ -570,123 +548,107 @@ class Document (object):
+
+ def get_its_params(self, rules):
+ params = {}
+- for child in xml_child_iter(rules):
+- if xml_is_ns_name(child, NS_ITS, 'param'):
+- params[child.nsProp('name', None)] = child.getContent()
++ for child in rules.iterchildren():
++ if child.tag == '{' + NS_ITS + '}param':
++ params[child.get('name')] = xml_content(child)
+ return params
+
+- def register_its_params(self, xpath, params, userparams={}):
+- for param in params:
+- if param in userparams:
+- xpath.xpathRegisterVariable(name, None, userparams[param])
++ def register_its_params(self, var, params, userparams={}):
++ for name in params:
++ if name in userparams:
++ var[name] = userparams[name]
+ else:
+- xpath.xpathRegisterVariable(name, None, params[param])
++ var[name] = params[name]
+
+ def apply_its_rule(self, rule, xpath):
+ self._clear_cache()
+- if rule.type != 'element':
+- return
+- if xml_is_ns_name(rule, NS_ITS, 'translateRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._its_translate_nodes[node] = rule.nsProp('translate', None)
+- elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._its_within_text_nodes[node] = rule.nsProp('withinText', None)
+- elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- val = rule.nsProp('preserveSpace', None)
++ if rule.tag == '{' + NS_ITS + '}translateRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._its_translate_nodes[node] = rule.get('translate')
++ elif rule.tag == '{' + NS_ITS + '}withinTextRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._its_within_text_nodes[node] = rule.get('withinText')
++ elif rule.tag == '{' + NS_ITST + '}preserveSpaceRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ val = rule.get('preserveSpace')
+ if val == 'yes':
+ self._its_preserve_space_nodes[node] = 'preserve'
+- elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._its_preserve_space_nodes[node] = rule.nsProp('space', None)
+- elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'):
+- if rule.nsProp('selector', None) is not None:
+- if rule.hasNsProp('localeFilterList', None):
+- lst = rule.nsProp('localeFilterList', None)
+- else:
+- lst = '*'
+- if rule.hasNsProp('localeFilterType', None):
+- typ = rule.nsProp('localeFilterType', None)
+- else:
+- typ = 'include'
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
++ elif rule.tag == '{' + NS_ITS + '}preserveSpaceRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._its_preserve_space_nodes[node] = rule.get('space')
++ elif rule.tag == '{' + NS_ITS + '}localeFilterRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ lst = rule.get('localeFilterList', '*')
++ typ = rule.get('localeFilterType', 'include')
++ for node in self._try_xpath_eval(xpath, sel):
+ self._its_locale_filters[node] = (lst, typ)
+- elif xml_is_ns_name(rule, NS_ITST, 'dropRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- self._itst_drop_nodes[node] = rule.nsProp('drop', None)
+- elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'):
+- sel = rule.nsProp('selector', None)
+- idv = rule.nsProp('idValue', None)
++ elif rule.tag == '{' + NS_ITST + '}dropRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ self._itst_drop_nodes[node] = rule.get('drop')
++ elif rule.tag == '{' + NS_ITS + '}idValueRule':
++ sel = rule.get('selector')
++ idv = rule.get('idValue')
+ if sel is not None and idv is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- idvalue = self._try_xpath_eval(xpath, idv)
++ idvalue = self._try_xpath_eval(xpath, idv, node=node)
+ if isinstance(idvalue, string_types):
+ self._its_id_values[node] = idvalue
+ else:
+ for val in idvalue:
+- self._its_id_values[node] = val.content
++ self._its_id_values[node] = xml_content(val)
+ break
+- xpath.setContextNode(oldnode)
+ pass
+- elif xml_is_ns_name(rule, NS_ITST, 'contextRule'):
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- if rule.hasNsProp('context', None):
+- self._itst_contexts[node] = rule.nsProp('context', None)
+- elif rule.hasNsProp('contextPointer', None):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None))
++ elif rule.tag == '{' + NS_ITST + '}contextRule':
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ ctxt = rule.get('context')
++ cp = rule.get('contextPointer')
++ if ctxt is not None:
++ self._itst_contexts[node] = ctxt
++ elif cp is not None:
++ ctxt = self._try_xpath_eval(xpath, cp, node=node)
+ if isinstance(ctxt, string_types):
+ self._itst_contexts[node] = ctxt
+ else:
+ for ctxt in ctxt:
+- self._itst_contexts[node] = ctxt.content
++ self._itst_contexts[node] = xml_content(ctxt)
+ break
+- xpath.setContextNode(oldnode)
+- elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'):
++ elif rule.tag == '{' + NS_ITS + '}locNoteRule':
+ locnote = None
+- notetype = rule.nsProp('locNoteType', None)
+- for child in xml_child_iter(rule):
+- if xml_is_ns_name(child, NS_ITS, 'locNote'):
+- locnote = LocNote(locnote=child.content, locnotetype=notetype)
+- break
++ notetype = rule.get('locNoteType')
++ for child in rule.iterchildren('{' + NS_ITS + '}locNote'):
++ locnote = LocNote(locnote=xml_content(child), locnotetype=notetype)
++ break
+ if locnote is None:
+- if rule.hasNsProp('locNoteRef', None):
+- locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype)
+- if rule.nsProp('selector', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
++ if 'locNoteRef' in rule.attrib:
++ locnote = LocNote(locnoteref=rule.get('locNoteRef'), locnotetype=notetype)
++ sel = rule.get('selector')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
+ if locnote is not None:
+ self._its_loc_notes.setdefault(node, []).append(locnote)
+ else:
+- if rule.hasNsProp('locNotePointer', None):
+- sel = rule.nsProp('locNotePointer', None)
++ if 'locNotePointer' in rule.attrib:
++ sel = rule.get('locNotePointer')
+ ref = False
+- elif rule.hasNsProp('locNoteRefPointer', None):
+- sel = rule.nsProp('locNoteRefPointer', None)
++ elif 'locNoteRefPointer' in rule.attrib:
++ sel = rule.get('locNoteRefPointer')
+ ref = True
+ else:
+ continue
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- note = self._try_xpath_eval(xpath, sel)
++ note = self._try_xpath_eval(xpath, sel, node=node)
+ if isinstance(note, string_types):
+ if ref:
+ nodenote = LocNote(locnoteref=note, locnotetype=notetype)
+@@ -695,55 +657,56 @@ class Document (object):
+ self._its_loc_notes.setdefault(node, []).append(nodenote)
+ else:
+ for note in note:
++ text = xml_content(note)
+ if ref:
+- nodenote = LocNote(locnoteref=note.content, locnotetype=notetype)
++ nodenote = LocNote(locnoteref=text, locnotetype=notetype)
+ else:
+- nodenote = LocNote(locnote=note.content, locnotetype=notetype,
++ nodenote = LocNote(locnote=text, locnotetype=notetype,
+ space=self.get_preserve_space(note))
+ self._its_loc_notes.setdefault(node, []).append(nodenote)
+ break
+- xpath.setContextNode(oldnode)
+- elif xml_is_ns_name(rule, NS_ITS, 'langRule'):
+- if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None))
++ elif rule.tag == '{' + NS_ITS + '}langRule':
++ sel = rule.get('selector')
++ lp = rule.get('langPointer')
++ if sel is not None and lp is not None:
++ for node in self._try_xpath_eval(xpath, sel):
++ res = self._try_xpath_eval(xpath, lp, node=node)
+ if len(res) > 0:
+- self._its_lang[node] = res[0].content
++ self._its_lang[node] = xml_content(res[0])
+ # We need to construct language attributes, not just read
+ # language information. Technically, langPointer could be
+ # any XPath expression. But if it looks like an attribute
+ # accessor, just use the attribute name.
+- if rule.nsProp('langPointer', None)[0] == '@':
+- self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:]
+- xpath.setContextNode(oldnode)
+- elif xml_is_ns_name(rule, NS_ITST, 'credits'):
+- if rule.nsProp('appendTo', None) is not None:
+- for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)):
++ # TODO: This should probably be skipped if langPointer
++ # equals '@xml:lang' which is the default.
++ if lp[0] == '@':
++ name = lp[1:]
++ if ':' in name:
++ prefix, lname = name.split(':', 2)
++ nsuri = node.nsmap.get(prefix)
++ if nsuri is None:
++ name = lname
++ else:
++ name = '{' + nsuri + '}' + lname
++ self._itst_lang_attr[node] = name
++ elif rule.tag == '{' + NS_ITST + '}credits':
++ sel = rule.get('appendTo')
++ if sel is not None:
++ for node in self._try_xpath_eval(xpath, sel):
+ self._itst_credits = (node, rule)
+ break
+- elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or
+- xml_is_ns_name(rule, NS_ITST, 'externalRefRule')):
+- sel = rule.nsProp('selector', None)
+- if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'):
+- ptr = rule.nsProp('externalResourceRefPointer', None)
++ elif (rule.tag == '{' + NS_ITS + '}externalResourceRefRule' or
++ rule.tag == '{' + NS_ITST + '}externalRefRule'):
++ sel = rule.get('selector')
++ if rule.tag == '{' + NS_ITS + '}externalResourceRefRule':
++ ptr = rule.get('externalResourceRefPointer')
+ else:
+- ptr = rule.nsProp('refPointer', None)
++ ptr = rule.get('refPointer')
+ if sel is not None and ptr is not None:
+ for node in self._try_xpath_eval(xpath, sel):
+- try:
+- oldnode = xpath.contextNode()
+- except:
+- oldnode = None
+- xpath.setContextNode(node)
+- res = self._try_xpath_eval(xpath, ptr)
++ res = self._try_xpath_eval(xpath, ptr, node=node)
+ if len(res) > 0:
+- self._its_externals[node] = res[0].content
+- xpath.setContextNode(oldnode)
++ self._its_externals[node] = xml_content(res[0])
+
+ def apply_its_rules(self, builtins, userparams={}):
+ self._clear_cache()
+@@ -773,94 +736,59 @@ class Document (object):
+
+ def apply_its_file(self, filename, userparams={}):
+ self._clear_cache()
+- doc = libxml2.parseFile(filename)
+- root = doc.getRootElement()
+- if not xml_is_ns_name(root, NS_ITS, 'rules'):
++ parser = etree.XMLParser(resolve_entities = False)
++ root = etree.parse(filename, parser).getroot()
++ if root.tag != '{' + NS_ITS + '}rules':
+ return
+- version = None
+- if root.hasNsProp('version', None):
+- version = root.nsProp('version', None)
+- else:
++ version = root.get('version')
++ if version is None:
+ sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
+ os.path.basename(filename))
+- if version is not None and version not in ('1.0', '2.0'):
++ elif version not in ('1.0', '2.0'):
+ sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
+- (os.path.basename(filename), root.nsProp('version', None)))
++ (os.path.basename(filename), root.get('version')))
+ return
+ matched = True
+- for match in xml_child_iter(root):
+- if xml_is_ns_name(match, NS_ITST, 'match'):
++ for match in root.iterchildren():
++ if match.tag == '{' + NS_ITST + '}match':
+ matched = False
+- xpath = self._doc.xpathNewContext()
+- par = match
+- nss = {}
+- while par is not None:
+- nsdef = par.nsDefs()
+- while nsdef is not None:
+- if nsdef.name is not None:
+- if nsdef.name not in nss:
+- nss[nsdef.name] = nsdef.content
+- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
+- nsdef = nsdef.next
+- par = par.parent
+- if match.hasNsProp('selector', None):
+- if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0:
++ sel = match.get('selector')
++ if sel is not None:
++ ns = { k: v for k, v in match.nsmap.items() if k is not None }
++ xpath = (ns, {})
++ if len(self._try_xpath_eval(xpath, sel)) > 0:
+ matched = True
+ break
+ if matched == False:
+ return
++ ns = { k: v for k, v in match.nsmap.items() if k is not None }
++ var = {}
+ params = self.get_its_params(root)
+- for rule in xml_child_iter(root):
+- xpath = self._doc.xpathNewContext()
+- par = match
+- nss = {}
+- while par is not None:
+- nsdef = par.nsDefs()
+- while nsdef is not None:
+- if nsdef.name is not None:
+- if nsdef.name not in nss:
+- nss[nsdef.name] = nsdef.content
+- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
+- nsdef = nsdef.next
+- par = par.parent
+- self.register_its_params(xpath, params, userparams=userparams)
++ self.register_its_params(var, params, userparams=userparams)
++ xpath = (ns, var)
++ for rule in root.iterchildren():
+ self.apply_its_rule(rule, xpath)
+
+ def apply_local_its_rules(self, userparams={}):
+ self._clear_cache()
+ for rules in self._localrules:
+- def reg_ns(xpath, node):
+- if node.parent is not None:
+- reg_ns(xpath, node.parent)
+- nsdef = node.nsDefs()
+- while nsdef is not None:
+- if nsdef.name is not None:
+- xpath.xpathRegisterNs(nsdef.name, nsdef.content)
+- nsdef = nsdef.next
+- xpath = self._doc.xpathNewContext()
+- reg_ns(xpath, rules)
++ var = {}
+ params = self.get_its_params(rules)
+- self.register_its_params(xpath, params, userparams=userparams)
+- for rule in xml_child_iter(rules):
+- if rule.type != 'element':
+- continue
+- if rule.nsDefs() is not None:
+- rule_xpath = self._doc.xpathNewContext()
+- reg_ns(rule_xpath, rule)
+- self.register_its_params(rule_xpath, params, userparams=userparams)
+- else:
+- rule_xpath = xpath
++ self.register_its_params(var, params, userparams=userparams)
++ for rule in rules.iterchildren():
++ ns = { k: v for k, v in rule.nsmap.items() if k is not None }
++ rule_xpath = (ns, var)
+ self.apply_its_rule(rule, rule_xpath)
+
+ def _append_credits(self, parent, node, trdata):
+- if xml_is_ns_name(node, NS_ITST, 'for-each'):
+- select = node.nsProp('select', None)
++ if node.tag == '{' + NS_ITST + '}for-each':
++ select = node.get('select')
+ if select == 'years':
+ for year in trdata[2].split(','):
+- for child in xml_child_iter(node):
++ for child in node.iterchildren():
+ self._append_credits(parent, child, trdata + (year.strip(),))
+- elif xml_is_ns_name(node, NS_ITST, 'value-of'):
+- select = node.nsProp('select', None)
++ elif node.tag == '{' + NS_ITST + '}value-of':
++ select = node.get('select')
+ val = None
+ if select == 'name':
+ val = trdata[0]
+@@ -873,11 +801,20 @@ class Document (object):
+ if val is not None:
*** 731 LINES SKIPPED ***