Bug 891835 : extract comments in pot files into lang files

* gettext.py/pot_to_langfiles() updated to extract comments from pot file to templates/*.lang files * gettext.py/_append_to_lang_file() now outputs comments if they exist * dotlang.py/parse() has a new optional parameter: extract_comments=False * l10n_merge uses extract_comments=True * updated test_merge_lang_file() * added test_parse_with_comments() * all tests pass
2013-08-19 15:21:21 +02:00 · 2013-08-19 15:21:21 +02:00 · 384982b345
--- a/lib/l10n_utils/dotlang.py
+++ b/lib/l10n_utils/dotlang.py
@ -34,12 +34,13 @@ FORMAT_IDENTIFIER_RE = re.compile(r"""(%
 TAG_REGEX = re.compile(r"^## (\w+) ##")


-def parse(path, skip_untranslated=True):
+def parse(path, skip_untranslated=True, extract_comments=False):
    """
    Parse a dotlang file and return a dict of translations.
    :param path: Absolute path to a lang file.
    :param skip_untranslated: Exclude strings for which the ID and translation
                              match.
+    :param extract_comments: Extract one line comments from template if True
    :return: dict
    """
    trans = {}
@ -49,13 +50,18 @@ def parse(path, skip_untranslated=True):

    with codecs.open(path, 'r', 'utf-8', errors='replace') as lines:
        source = None
+        comment = ''

        for line in lines:
            if u'<EFBFBD>' in line:
                mail_error(path, line)

            line = line.strip()
-            if line == '' or line[0] == '#':
+            if not line:
+                continue
+
+            if line[0] == '#':
+                comment = line.lstrip('#').strip()
                continue

            if line[0] == ';':
@ -67,8 +73,12 @@ def parse(path, skip_untranslated=True):
                line = line.strip()
                if skip_untranslated and source == line:
                    continue
-                trans[source] = line
-
+                if extract_comments:
+                    trans[source] = [comment, line]
+                    comment = ''
+                else:
+                    trans[source] = line
+            
    return trans


--- a/lib/l10n_utils/gettext.py
+++ b/lib/l10n_utils/gettext.py
@ -35,21 +35,25 @@ def parse_po(path):

        msgid = None
        msgpath = None
+        msgcomment = None

        for line in lines:
            line = line.strip()
-            if line.startswith('#'):
+            if line.startswith('#:'):
                matches = REGEX_URL.match(line)
                if matches:
                    msgpath = matches.group(1)
+            elif line.startswith('#.'):
+                msgcomment = line.lstrip('#.').strip()
            elif line.startswith('msgid'):
                msgid = extract_content(line)
            elif line.startswith('msgstr') and msgid and msgpath:
                if msgpath not in msgs:
                    msgs[msgpath] = []
-                msgs[msgpath].append(msgid)
+                msgs[msgpath].append([msgcomment, msgid])
                msgid = None
                msgpath = None
+                msgcomment = None
            elif msgid is not None:
                msgid += parse_string(line)

@ -221,8 +225,10 @@ def pot_to_langfiles():

        with codecs.open(target, 'a', 'utf-8') as out:
            for msg in msgs:
-                if msg not in curr and msg not in main_msgs:
-                    out.write(';%s\n%s\n\n\n' % (msg, msg))
+                if msg[1] not in curr and msg[1] not in main_msgs:
+                    if msg[0] is not None:
+                        out.write(u'# %s\n' % (msg[0]))
+                    out.write(u';%s\n%s\n\n\n' % (msg[1], msg[1]))


 def find_lang_files(lang):
@ -254,14 +260,21 @@ def merge_lang_files(langs):

            dest = lang_file(f, lang)
            src_msgs = parse_lang(lang_file(f, 'templates'),
-                                  skip_untranslated=False)
+                                  skip_untranslated=False,
+                                  extract_comments=True)
            dest_msgs = parse_lang(dest, skip_untranslated=False)
+            new_msgs = [src_msgs[msg] for msg in src_msgs if msg not in dest_msgs]

-            new_msgs = [msg for msg in src_msgs if msg not in dest_msgs]
            _append_to_lang_file(dest, new_msgs)


 def _append_to_lang_file(dest, new_msgs):
    with codecs.open(dest, 'a', 'utf-8') as out:
        for msg in new_msgs:
-            out.write(u'\n\n;{msg}\n{msg}\n'.format(msg=msg))
+            if isinstance(msg, basestring):
+                msg = [None, msg]
+            out_str = u'\n\n'
+            if msg[0]:
+                out_str += u'# {comment}\n'
+            out_str += u';{msg}\n{msg}\n'
+            out.write(out_str.format(msg=msg[1], comment=msg[0]))
--- a/lib/l10n_utils/tests/test_commands.py
+++ b/lib/l10n_utils/tests/test_commands.py
@ -345,8 +345,8 @@ class Testl10nMerge(unittest.TestCase):
        merge_lang_files(['de'])
        dest_file = path.join(ROOT, 'locale', 'de', 'firefox', 'fx.lang')
        write_mock.assert_called_once_with(dest_file,
-                                           [u'Find out if your device is '
-                                            u'supported &nbsp;\xbb'])
+                                           [['', u'Find out if your device is '
+                                           u'supported &nbsp;\xbb']])

    @patch('lib.l10n_utils.gettext.codecs.open')
    def test_append_to_lang_file(self, open_mock):
--- a/lib/l10n_utils/tests/test_dotlang.py
+++ b/lib/l10n_utils/tests/test_dotlang.py
@ -132,6 +132,21 @@ class TestDotlang(TestCase):
        }
        eq_(parsed, expected)

+    def test_parse_with_comments(self):
+        path = os.path.join(ROOT, 'test.lang')
+        parsed = parse(path, extract_comments=True)
+
+        expected = {
+            u'Hooray! Your Firefox is up to date.':
+                [u'This is for the Whatsnew page: '
+                 u'http://www-dev.allizom.org/b/firefox/whatsnew/',
+                 u'F\xe9licitations&nbsp;! '
+                 u'Votre Firefox a \xe9t\xe9 mis \xe0 jour.'],
+            u'Your Firefox is out of date.':
+                ['', u'Votre Firefox ne semble pas \xe0 jour.']
+        }
+        eq_(parsed, expected)
+
    def test_parse_utf8_error(self):
        path = os.path.join(ROOT, 'test_utf8_error.lang')
        parsed = parse(path)