Bug 1592561, adapt Pontoon parser to compare-locales, some basic tests, r=mathjazz

Copy .lang parser from Pontoon

This is taken from 82190d5bdf/pontoon/sync/formats/lang.py.

Adapt Pontoon parser to compare-locales, some basic tests.

The parser isn't really intended to be used in a .lang production
environment, so there's not a lot of effort here.
The idea is that we can read good .lang files and get translations
out.

Differential Revision: https://phabricator.services.mozilla.com/D51123
This commit is contained in:
Axel Hecht 2019-11-06 18:13:59 +01:00
Родитель a1e377e1e6
Коммит 5be0df0876
11 изменённых файлов: 272 добавлений и 2 удалений

Просмотреть файл

@ -7,3 +7,6 @@
^.tox$
^.coverage$
^htmlcov$
^contrib/lang/build$
^contrib/lang/dist$
^contrib/lang/src/cl_ext.lang.egg-info$

5
contrib/lang/.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,5 @@
*.orig
__pycache__
build/
dist/
src/cl_ext.lang.egg-info/

25
contrib/lang/LICENSE Normal file
Просмотреть файл

@ -0,0 +1,25 @@
Copyright (c) 2012, Mozilla Foundation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright owner nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

6
contrib/lang/setup.cfg Normal file
Просмотреть файл

@ -0,0 +1,6 @@
[bdist_wheel]
universal=1
[options.entry_points]
compare_locales.parsers =
lang=cl_ext.lang.lang:LangParser

18
contrib/lang/setup.py Normal file
Просмотреть файл

@ -0,0 +1,18 @@
from __future__ import absolute_import
from setuptools import setup
setup(
name="cl_ext.lang",
author="Axel Hecht",
author_email="axel@mozilla.com",
description=".lang parser for compare-locales",
platforms=["any"],
python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4',
package_dir={"": "src"},
packages=['cl_ext', 'cl_ext.lang'],
install_requires=[
"parsimonious",
"compare_locales",
]
)

Просмотреть файл

@ -0,0 +1 @@
__path__ = __import__('pkgutil').extend_path(__path__, __name__)

Просмотреть файл

Просмотреть файл

@ -0,0 +1,159 @@
"""
Parser for the .lang translation format.
"""
from __future__ import absolute_import
import re
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor
from compare_locales.parser.base import Comment, LiteralEntity, Junk, Parser
from compare_locales.paths import File
BLANK_LINE = 'blank_line'
TAG_REGEX = re.compile(r'\{(ok)\}', re.I)
class LangComment(Comment):
def __init__(self, marker, content, end):
self.marker = marker
self.raw_content = content
self.end = end
@property
def content(self):
return self.raw_content.strip()
@property
def raw(self):
return self.marker + self.raw_content + self.end
class LangEntity(LiteralEntity):
def __init__(self, source_string, translation_string, all, tags):
super(LangEntity, self).__init__(
key=source_string, # .lang files use the source as the key.
val=translation_string,
all=all,
)
self.tags = set(tags)
@property
def localized(self):
return self.key != self.val or 'ok' in self.tags
@property
def extra(self):
return {'tags': list(self.tags)}
class LangVisitor(NodeVisitor):
grammar = Grammar(r"""
lang_file = (comment / entity / blank_line)*
comment = "#"+ line_content line_ending
line_content = ~r".*"
line_ending = ~r"$\n?"m # Match at EOL and EOF without newline.
blank_line = ~r"((?!\n)\s)*" line_ending
entity = string translation
string = ";" line_content line_ending
translation = line_content line_ending
""")
def __init__(self, ctx):
super().__init__()
self.ctx = ctx
def visit_lang_file(self, node, children):
"""
Find comments that are associated with an entity and add them
to the entity's comments list. Also assign order to entities.
"""
comments = []
order = 0
for child in children:
if isinstance(child, LangComment):
comments.append(child)
continue
if isinstance(child, LangEntity):
child.comments = [c.content for c in comments]
child.order = order
order += 1
comments = []
return children
def visit_comment(self, node, node_info):
marker, content, end = node_info
return LangComment(
node_text(marker), node_text(content), node_text(end)
)
def visit_blank_line(self, node, _):
return BLANK_LINE
def visit_entity(self, node, node_info):
string, translation = node_info
# Strip tags out of translation if they exist.
tags = []
tag_matches = list(re.finditer(TAG_REGEX, translation))
if tag_matches:
tags = [m.group(1).lower() for m in tag_matches]
translation = translation[:tag_matches[0].start()].strip()
if translation == '':
return Junk(self.ctx, (0, 0))
return LangEntity(string, translation, node.text, tags)
def visit_string(self, node, node_info):
marker, content, end = node_info
return content.text.strip()
def visit_translation(self, node, node_info):
content, end = node_info
return content.text.strip()
def generic_visit(self, node, children):
if children and len(children) == 1:
return children[0]
else:
return children or node
def node_text(node):
"""
Convert a Parsimonious node into text, including nodes that may
actually be a list of nodes due to repetition.
"""
if node is None:
return u''
elif isinstance(node, list):
return ''.join([n.text for n in node])
else:
return node.text
class LangParser(Parser):
def use(self, path):
if isinstance(path, File):
path = path.fullpath
return path.endswith('.lang')
def walk(self, only_localizable=False):
if not self.ctx:
# loading file failed, or we just didn't load anything
return
ctx = self.ctx
contents = ctx.contents
for c in LangVisitor(ctx).parse(contents):
if not only_localizable or isinstance(c, (LangEntity, Junk)):
yield c

Просмотреть файл

Просмотреть файл

@ -0,0 +1,44 @@
from __future__ import absolute_import, unicode_literals
import unittest
from compare_locales import parser
from parsimonious.exceptions import ParseError
class TestLangParser(unittest.TestCase):
def test_good(self):
p = parser.getParser('foo.lang')
p.readUnicode('''\
# Sample comment
;Source String
Translated String
# First comment
# Second comment
;Multiple Comments
Translated Multiple Comments
;No Comments or Sources
Translated No Comments or Sources
''')
msgs = p.parse()
self.assertEqual(len(msgs), 3)
def test_empty_translation(self):
p = parser.getParser('foo.lang')
p.readUnicode('''\
# Sample comment
;Source String
''')
msgs = p.parse()
self.assertEqual(len(msgs), 1)
self.assertIsInstance(msgs[0], parser.Junk)
def test_bad(self):
p = parser.getParser('foo.lang')
p.readUnicode('''\
just garbage
''')
with self.assertRaises(ParseError):
p.parse()

13
tox.ini
Просмотреть файл

@ -1,5 +1,5 @@
[tox]
envlist = py27, py35, py36, py37, flake8, integration
envlist = py27, py35, py36, py37, flake8, lang, integration
skipsdist=True
[travis]
@ -8,10 +8,19 @@ python =
[testenv]
commands=python -B setup.py test
[testenv:flake8]
deps=flake8 >=3.7, <3.8
basepython=python3.7
commands=flake8 compare_locales
commands=
flake8 compare_locales contrib/lang
[testenv:integration]
deps=six
commands=python -m unittest discover -s compare_locales/integration_tests
[testenv:lang]
basepython=python3.7
deps=
--editable=contrib/lang
commands=python -m unittest discover contrib/lang/tests