From c2e1824671747030234dab895400699cbc1d99be Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sun, 15 Mar 2015 20:04:37 -0400 Subject: [PATCH 1/2] Add python 3 support --- itstool.in | 180 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 103 insertions(+), 77 deletions(-) diff --git a/itstool.in b/itstool.in index d0e76a1..1a59a5a 100755 --- a/itstool.in +++ b/itstool.in @@ -16,6 +16,8 @@ # with ITS Tool; if not, write to the Free Software Foundation, 59 Temple # Place, Suite 330, Boston, MA 0211-1307 USA. # +from __future__ import print_function +from __future__ import unicode_literals VERSION="@VERSION@" DATADIR="@DATADIR@" @@ -29,6 +31,22 @@ import os.path import re import sys import time +import io + +PY3 = sys.version_info[0] == 3 +if PY3: + string_types = str, + def ustr(s, encoding=None): + if isinstance(s, str): + return s + elif encoding: + return str(s, encoding) + else: + return str(s) + ustr_type = str +else: + string_types = basestring, + ustr = ustr_type = unicode NS_ITS = 'http://www.w3.org/2005/11/its' NS_ITST = 'http://itstool.org/extensions/' @@ -81,14 +99,14 @@ class MessageList (object): return self._by_node.get(node, None) def get_nodes_with_messages (self): - return self._by_node.keys() + return list(self._by_node.keys()) def output (self, out): msgs = [] msgdict = {} for msg in self._messages: key = (msg.get_context(), msg.get_string()) - if msgdict.has_key(key): + if key in msgdict: for source in msg.get_sources(): msgdict[key].add_source(source) for marker in msg.get_markers(): @@ -105,7 +123,7 @@ class MessageList (object): msgdict[key].set_locale_filter('%s, %s' % (locale, msg.get_locale_filter())) else: msgdict[key].set_locale_filter(msg.get_locale_filter()) - + else: msgs.append(msg) msgdict[key] = msg @@ -121,13 +139,13 @@ class MessageList (object): out.write('"Content-Transfer-Encoding: 8bit\\n"\n') out.write('\n') for msg in msgs: - out.write(msg.format().encode('utf-8')) + out.write(msg.format()) out.write('\n') class Comment (object): def __init__ (self, text): - self._text = str(text) + self._text = ustr(text) assert(text is not None) self._markers = [] @@ -141,10 +159,10 @@ class Comment (object): return self._text def format (self): - ret = u'' + ret = '' markers = {} for marker in self._markers: - if not markers.has_key(marker): + if marker not in markers: ret += '#. (itstool) comment: ' + marker + '\n' markers[marker] = marker if '\n' in self._text: @@ -154,7 +172,7 @@ class Comment (object): doadd = True if not doadd: continue - ret += u'#. %s\n' % line + ret += '#. %s\n' % line else: text = self._text while len(text) > 72: @@ -163,7 +181,7 @@ class Comment (object): j = text.find(' ') if j == -1: break - ret += u'#. %s\n' % text[:j] + ret += '#. %s\n' % text[:j] text = text[j+1:] ret += '#. %s\n' % text return ret @@ -190,16 +208,16 @@ class Message (object): class Placeholder (object): def __init__ (self, node): self.node = node - self.name = unicode(node.name, 'utf-8') + self.name = ustr(node.name, 'utf-8') def escape (self, text): return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t") def add_text (self, text): - if len(self._message) == 0 or not(isinstance(self._message[-1], basestring)): + if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): self._message.append('') - if not isinstance(text, unicode): - text = unicode(text, 'utf-8') + if not isinstance(text, ustr_type): + text = ustr(text, 'utf-8') self._message[-1] += text.replace('&', '&').replace('<', '<').replace('>', '>') if re.sub('\s+', ' ', text).strip() != '': self._empty = False @@ -216,25 +234,25 @@ class Message (object): def get_placeholder (self, name): placeholder = 1 for holder in self._placeholders: - holdername = u'%s-%i' % (holder.name, placeholder) - if holdername == unicode(name, 'utf-8'): + holdername = '%s-%i' % (holder.name, placeholder) + if holdername == ustr(name, 'utf-8'): return holder placeholder += 1 def add_start_tag (self, node): - if len(self._message) == 0 or not(isinstance(self._message[-1], basestring)): + if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): self._message.append('') if node.ns() is not None and node.ns().name is not None: - self._message[-1] += (u'<%s:%s' % (unicode(node.ns().name, 'utf-8'), unicode(node.name, 'utf-8'))) + self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8'))) else: - self._message[-1] += (u'<%s' % unicode(node.name, 'utf-8')) + self._message[-1] += ('<%s' % ustr(node.name, 'utf-8')) for prop in xml_attr_iter(node): name = prop.name if prop.ns() is not None: name = prop.ns().name + ':' + name atval = prop.content - if not isinstance(atval, unicode): - atval = unicode(atval, 'utf-8') + if not isinstance(atval, ustr_type): + atval = ustr(atval, 'utf-8') atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') self._message += " %s=\"%s\"" % (name, atval) if node.children is not None: @@ -244,12 +262,12 @@ class Message (object): def add_end_tag (self, node): if node.children is not None: - if len(self._message) == 0 or not(isinstance(self._message[-1], basestring)): + if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): self._message.append('') if node.ns() is not None and node.ns().name is not None: - self._message[-1] += (u'' % (unicode(node.ns().name, 'utf-8'), unicode(node.name, 'utf-8'))) + self._message[-1] += ('' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8'))) else: - self._message[-1] += (u'' % unicode(node.name, 'utf-8')) + self._message[-1] += ('' % ustr(node.name, 'utf-8')) def is_empty (self): return self._empty @@ -261,16 +279,16 @@ class Message (object): self._ctxt = ctxt def add_source (self, source): - if not isinstance(source, unicode): - source = unicode(source, 'utf-8') + if not isinstance(source, ustr_type): + source = ustr(source, 'utf-8') self._sources.append(source) def get_sources (self): return self._sources def add_marker (self, marker): - if not isinstance(marker, unicode): - marker = unicode(marker, 'utf-8') + if not isinstance(marker, ustr_type): + marker = ustr(marker, 'utf-8') self._markers.append(marker) def get_markers (self): @@ -290,13 +308,13 @@ class Message (object): return self._comments def get_string (self): - message = u'' + message = '' placeholder = 1 for msg in self._message: - if isinstance(msg, basestring): + if isinstance(msg, string_types): message += msg elif isinstance(msg, Message.Placeholder): - message += u'<_:%s-%i/>' % (msg.name, placeholder) + message += '<_:%s-%i/>' % (msg.name, placeholder) placeholder += 1 if not self._preserve: message = re.sub('\s+', ' ', message).strip() @@ -315,10 +333,10 @@ class Message (object): self._locale_filter = locale def format (self): - ret = u'' + ret = '' markers = {} for marker in self._markers: - if not markers.has_key(marker): + if marker not in markers: ret += '#. (itstool) path: ' + marker + '\n' markers[marker] = marker for idvalue in self._id_values: @@ -329,7 +347,7 @@ class Message (object): commentsdict = {} for comment in self._comments: key = comment.get_text() - if commentsdict.has_key(key): + if key in commentsdict: for marker in comment.get_markers(): commentsdict[key].add_marker(marker) else: @@ -340,23 +358,23 @@ class Message (object): ret += '#.\n' ret += comments[i].format() for source in self._sources: - ret += u'#: %s\n' % source + ret += '#: %s\n' % source if self._preserve: - ret += u'#, no-wrap\n' + ret += '#, no-wrap\n' if self._ctxt is not None: - ret += u'msgctxt "%s"\n' % self._ctxt + ret += 'msgctxt "%s"\n' % self._ctxt message = self.get_string() if self._preserve: - ret += u'msgid ""\n' + ret += 'msgid ""\n' lines = message.split('\n') - for line, no in zip(lines, range(len(lines))): + for line, no in zip(lines, list(range(len(lines)))): if no == len(lines) - 1: - ret += u'"%s"\n' % self.escape(line) + ret += '"%s"\n' % self.escape(line) else: - ret += u'"%s\\n"\n' % self.escape(line) + ret += '"%s\\n"\n' % self.escape(line) else: - ret += u'msgid "%s"\n' % self.escape(message) - ret += u'msgstr ""\n' + ret += 'msgid "%s"\n' % self.escape(message) + ret += 'msgstr ""\n' return ret @@ -413,7 +431,7 @@ def fix_node_ns (node, nsdefs): nsdef = node.nsDefs() while nsdef is not None: nextnsdef = nsdef.next - if nsdefs.has_key(nsdef.name) and nsdefs[nsdef.name] == nsdef.content: + if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content: node.removeNsDef(nsdef.content) else: childnsdefs[nsdef.name] = nsdef.content @@ -508,7 +526,7 @@ class Document (object): try: self._check_errors() except libxml2.parserError as e: - sys.stderr.write('Error: Could not parse document:\n%s\n' % str(e)) + sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e)) sys.exit(1) self._msgs = messages self._its_translate_nodes = {} @@ -532,7 +550,7 @@ class Document (object): for child in xml_child_iter(rules): if xml_is_ns_name(child, NS_ITS, 'param'): name = child.nsProp('name', None) - if params.has_key(name): + if name in params: value = params[name] else: value = child.getContent() @@ -586,7 +604,7 @@ class Document (object): oldnode = None xpath.setContextNode(node) idvalue = self._try_xpath_eval(xpath, idv) - if isinstance(idvalue, basestring): + if isinstance(idvalue, string_types): self._its_id_values[node] = idvalue else: for val in idvalue: @@ -606,7 +624,7 @@ class Document (object): oldnode = None xpath.setContextNode(node) ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None)) - if isinstance(ctxt, basestring): + if isinstance(ctxt, string_types): self._itst_contexts[node] = ctxt else: for ctxt in ctxt: @@ -642,7 +660,7 @@ class Document (object): oldnode = None xpath.setContextNode(node) note = self._try_xpath_eval(xpath, sel) - if isinstance(note, basestring): + if isinstance(note, string_types): if ref: nodenote = LocNote(locnoteref=note, locnotetype=notetype) else: @@ -751,7 +769,7 @@ class Document (object): nsdef = par.nsDefs() while nsdef is not None: if nsdef.name is not None: - if not nss.has_key(nsdef.name): + if nsdef.name not in nss: nss[nsdef.name] = nsdef.content xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next @@ -770,7 +788,7 @@ class Document (object): nsdef = par.nsDefs() while nsdef is not None: if nsdef.name is not None: - if not nss.has_key(nsdef.name): + if nsdef.name not in nss: nss[nsdef.name] = nsdef.content xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next @@ -875,7 +893,7 @@ class Document (object): prevtext = node.prev.content if re.sub('\s+', '', prevtext) == '': prevnode = node.prev - for lang in sorted(translations.keys(), reverse=True): + for lang in sorted(list(translations.keys()), reverse=True): locale = self.get_its_locale_filter(node) lmatch = match_locale_list(locale[0], lang) if (locale[1] == 'include' and not lmatch) or (locale[1] == 'exclude' and lmatch): @@ -966,7 +984,9 @@ class Document (object): for attr in trans_attrs: newcontent = translations.ugettext(attr.get_content()) if newcontent: - newnode.setProp(attr.name, translations.ugettext(attr.get_content())) + if not PY3: + newcontent = newcontent.encode('utf-8') + newnode.setProp(attr.name, newcontent) def get_translated (self, node, translations, strict=False, lang=None): msg = self._msgs.get_message_by_node(node) @@ -993,15 +1013,17 @@ class Document (object): nss['_'] = NS_BLANK try: blurb = node.doc.intSubset().serialize('utf-8') - except: + except Exception: blurb = '' - blurb += '<' + node.name - for nsname in nss.keys(): + blurb += '<' + ustr(node.name, 'utf-8') + for nsname in list(nss.keys()): if nsname is None: blurb += ' xmlns="%s"' % nss[nsname] else: blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname]) - blurb += '>%s' % (trans.encode('utf-8'), node.name) + blurb += '>%s' % (trans, ustr(node.name, 'utf-8')) + if not PY3: + blurb = blurb.encode('utf-8') ctxt = libxml2.createDocParserCtxt(blurb) if self._load_dtd: ctxt.loadSubset(1) @@ -1010,7 +1032,7 @@ class Document (object): trnode = ctxt.doc().getRootElement() try: self._check_errors() - except libxml2.parserError as e: + except libxml2.parserError: if strict: raise else: @@ -1093,7 +1115,7 @@ class Document (object): if self.get_its_locale_filter(node) != ('*', 'include'): msg.set_locale_filter(self.get_its_locale_filter(node)) msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) - msg.add_marker('%s/%s' % (node.parent.name, node.name)) + msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8'))) else: withinText = True msg.add_start_tag(node) @@ -1148,7 +1170,7 @@ class Document (object): msg.add_end_tag(node) def generate_external_resource_message(self, node): - if not self._its_externals.has_key(node): + if node not in self._its_externals: return resref = self._its_externals[node] if node.type == 'element': @@ -1162,10 +1184,10 @@ class Document (object): msg = Message() try: fullfile = os.path.join(os.path.dirname(self._filename), resref) - filefp = open(fullfile) + filefp = open(fullfile, 'rb') filemd5 = hashlib.md5(filefp.read()).hexdigest() filefp.close() - except: + except Exception: filemd5 = '__failed__' txt = "external ref='%s' md5='%s'" % (resref, filemd5) msg.set_context('_') @@ -1189,7 +1211,7 @@ class Document (object): while node.type in ('attribute', 'element'): if node.getSpacePreserve() == 1: return True - if self._its_preserve_space_nodes.has_key(node): + if node in self._its_preserve_space_nodes: return (self._its_preserve_space_nodes[node] == 'preserve') node = node.parent return False @@ -1200,7 +1222,7 @@ class Document (object): val = node.nsProp('translate', NS_ITS) elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None): val = node.nsProp('translate', None) - elif self._its_translate_nodes.has_key(node): + elif node in self._its_translate_nodes: val = self._its_translate_nodes[node] if val is not None: return val @@ -1232,7 +1254,7 @@ class Document (object): else: typ = 'include' return (lst, typ) - if (xml_is_ns_name(node, NS_ITS, 'span') and + if (xml_is_ns_name(node, NS_ITS, 'span') and (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))): if node.hasNsProp('localeFilterList', None): lst = node.nsProp('localeFilterList', None) @@ -1243,7 +1265,7 @@ class Document (object): else: typ = 'include' return (lst, typ) - if self._its_locale_filters.has_key(node): + if node in self._its_locale_filters: return self._its_locale_filters[node] if node.parent.type == 'element': return self.get_its_locale_filter(node.parent) @@ -1298,7 +1320,7 @@ class Document (object): val = self.get_its_loc_notes(node) if len(val) > 0: if val[0].locnote is not None: - compval = 'locNote="%s"\tlocNoteType="%s"' % (str(val[0]), val[0].locnotetype) + compval = 'locNote="%s"\tlocNoteType="%s"' % (ustr(val[0]), val[0].locnotetype) elif val[0].locnoteref is not None: compval = 'locNoteRef="%s"\tlocNoteType="%s"' % (val[0].locnoteref, val[0].locnotetype) elif category == 'externalResourceRef': @@ -1321,7 +1343,7 @@ class Document (object): out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval)) else: out.write('%s\r\n' % (xml_get_node_path(node))) - for attr in sorted(xml_attr_iter(node), lambda x, y: cmp(str(x), str(y))): + for attr in sorted(xml_attr_iter(node), key=ustr): self.output_test_data(category, out, attr) for child in xml_child_iter(node): if child.type == 'element': @@ -1481,7 +1503,7 @@ if __name__ == '__main__': out = sys.stdout else: try: - out = file(opts.output, 'w') + out = io.open(opts.output, 'wt', encoding='utf-8') except: sys.stderr.write('Error: Cannot write to file %s\n' % opts.output) sys.exit(1) @@ -1495,6 +1517,8 @@ if __name__ == '__main__': except: sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge) sys.exit(1) + if PY3: + translations.ugettext = translations.gettext translations.add_fallback(NoneTranslations()) if opts.lang is None: opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0]) @@ -1506,7 +1530,7 @@ if __name__ == '__main__': if opts.output == '-': out = sys.stdout else: - out = file(opts.output, 'w') + out = open(opts.output, 'w') else: sys.stderr.write('Error: Non-directory output for multiple files\n') sys.exit(1) @@ -1520,11 +1544,11 @@ if __name__ == '__main__': try: doc.merge_translations(translations, opts.lang, strict=opts.strict) except Exception as e: - sys.stderr.write('Error: Could not merge translations:\n%s\n' % str(e)) + sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e)) sys.exit(1) fout = out - if isinstance(fout, basestring): - fout = file(os.path.join(fout, os.path.basename(filename)), 'w') + if isinstance(fout, string_types): + fout = open(os.path.join(fout, os.path.basename(filename)), 'w') fout.write(doc._doc.serialize('utf-8')) elif opts.join is not None: translations = {} @@ -1535,14 +1559,16 @@ if __name__ == '__main__': sys.stderr.write('Error: cannot open mo file %s\n' % filename) sys.exit(1) thistr.add_fallback(NoneTranslations()) + if PY3: + thistr.ugettext = thistr.gettext lang = convert_locale(os.path.splitext(os.path.basename(filename))[0]) translations[lang] = thistr if opts.output is None: out = sys.stdout elif os.path.isdir(opts.output): - out = file(os.path.join(opts.output, os.path.basename(filename)), 'w') + out = open(os.path.join(opts.output, os.path.basename(filename)), 'w') else: - out = file(opts.output, 'w') + out = open(opts.output, 'w') messages = MessageList() doc = Document(opts.join, messages) doc.apply_its_rules(not(opts.nobuiltins), params=params) @@ -1555,9 +1581,9 @@ if __name__ == '__main__': try: doc.merge_translations(translations, opts.lang, strict=opts.strict) except Exception as e: - sys.stderr.write('Error: Could not merge translations:\n%s\n' % str(e)) + sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e)) sys.exit(1) fout = out - if isinstance(fout, basestring): - fout = file(os.path.join(fout, os.path.basename(filename)), 'w') + if isinstance(fout, string_types): + fout = open(os.path.join(fout, os.path.basename(filename)), 'w') fout.write(doc._doc.serialize('utf-8')) From 89aff64b9e7fbfd74e586ef61c0804b646004e80 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sat, 16 Jan 2016 16:05:54 -0500 Subject: [PATCH 2/2] Fix problem with merging translations under py3 This fixes three problems at once: 1. When fiddling with credits nodes, we would get an error about libxml2 expecting `str` rather than `bytes`. We could fix this by encoding the value only when uner py2. 2. When writing the merged XML, our serlialized data would be `str` under py3, which would cause implicit encoding problems when writing that contents to the file. 3. `fout` would not be closed after writing, which would sometimes cause the target file to end up with no contents at all (at least on my machine. --- itstool.in | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/itstool.in b/itstool.in index 1a59a5a..45ff0c0 100755 --- a/itstool.in +++ b/itstool.in @@ -839,7 +839,8 @@ class Document (object): elif select == 'year' and len(trdata) == 4: val = trdata[3] if val is not None: - val = val.encode('utf-8') + if not PY3: + val = val.encode('utf-8') parent.addContent(val) else: newnode = node.copyNode(2) @@ -1546,10 +1547,18 @@ if __name__ == '__main__': except Exception as e: sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e)) sys.exit(1) + serialized = doc._doc.serialize('utf-8') + if PY3: + # For some reason, under py3, our serialized data is returns as a str. + # Let's encode it to bytes + serialized = serialized.encode('utf-8') fout = out - if isinstance(fout, string_types): - fout = open(os.path.join(fout, os.path.basename(filename)), 'w') - fout.write(doc._doc.serialize('utf-8')) + fout_is_str = isinstance(fout, string_types) + if fout_is_str: + fout = open(os.path.join(fout, os.path.basename(filename)), 'wb') + fout.write(serialized) + if fout_is_str: + fout.close() elif opts.join is not None: translations = {} for filename in args[1:]: