From be8e956775b1cd2d2c03221bca408d6bb4b5c2ce Mon Sep 17 00:00:00 2001 From: "axel%pike.org" Date: Wed, 16 Aug 2006 15:39:22 +0000 Subject: [PATCH] bug 348731, compare-locales unicode support redone --- testing/tests/l10n/lib/Mozilla/CompareLocales.py | 2 +- testing/tests/l10n/lib/Mozilla/Parser.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/testing/tests/l10n/lib/Mozilla/CompareLocales.py b/testing/tests/l10n/lib/Mozilla/CompareLocales.py index 1b17653f756..c760d895756 100755 --- a/testing/tests/l10n/lib/Mozilla/CompareLocales.py +++ b/testing/tests/l10n/lib/Mozilla/CompareLocales.py @@ -180,7 +180,7 @@ def compare(testLocales=[]): try: parser = Parser.getParser(path) except UserWarning: - logging.warning(" Can't compare " + path + " in " + mod + " for " + str(locales)) + logging.warning(" Can't compare " + path + " in " + mod) continue parser.read(Paths.get_path(mod, 'en-US', path)) enMap = parser.mapping() diff --git a/testing/tests/l10n/lib/Mozilla/Parser.py b/testing/tests/l10n/lib/Mozilla/Parser.py index 5d9f0d05bea..cd2de4d00dd 100755 --- a/testing/tests/l10n/lib/Mozilla/Parser.py +++ b/testing/tests/l10n/lib/Mozilla/Parser.py @@ -36,16 +36,23 @@ # ***** END LICENSE BLOCK ***** import re +import codecs +import logging __statics = {} __constructors = {} class Parser: def __init__(self): + if not hasattr(self, 'encoding'): + self.encoding = 'utf-8'; pass def read(self, file): - f = open(file) - self.contents = f.read() + f = codecs.open(file, 'r', self.encoding) + try: + self.contents = f.read() + except UnicodeDecodeError, e: + logging.error(" Can't read file: " + file + '; ' + str(e)) f.close() def mapping(self): m = {} @@ -69,7 +76,7 @@ class Parser: self.offset = cm.end() return self.next() self.offset = m.end() - return (unicode(m.group(1)), self.postProcessValue(m.group(2))) + return (m.group(1), self.postProcessValue(m.group(2))) def getParser(path): ext = path.rsplit('.',1)[1] @@ -90,14 +97,13 @@ class PropertiesParser(Parser): def __init__(self): self.key = re.compile('^\s*([^#!\s\r\n][^=:\r\n]*?)\s*[:=][ \t]*(.*?)[ \t]*$',re.M) self.comment = re.compile('^\s*[#!].*$',re.M) - self._post = re.compile('\\\\u([0-9a-f]+)') + self._post = re.compile('\\\\u([0-9a-fA-F]{4})') Parser.__init__(self) _arg_re = re.compile('%(?:(?P[0-9]+)\$)?(?P[0-9]+)?(?:.(?P[0-9]+))?(?P[hL]|(?:ll?))?(?P[dciouxXefgpCSsn])') def postProcessValue(self, val): m = self._post.search(val) if not m: return val - val = unicode(val) while m: uChar = unichr(int(m.group(1), 16)) val = val.replace(m.group(), uChar)