Bug 1592561, adapt Pontoon parser to compare-locales, some basic tests, r=mathjazz
Copy .lang parser from Pontoon
This is taken from 82190d5bdf/pontoon/sync/formats/lang.py
.
Adapt Pontoon parser to compare-locales, some basic tests.
The parser isn't really intended to be used in a .lang production
environment, so there's not a lot of effort here.
The idea is that we can read good .lang files and get translations
out.
Differential Revision: https://phabricator.services.mozilla.com/D51123
This commit is contained in:
Родитель
a1e377e1e6
Коммит
5be0df0876
|
@ -7,3 +7,6 @@
|
|||
^.tox$
|
||||
^.coverage$
|
||||
^htmlcov$
|
||||
^contrib/lang/build$
|
||||
^contrib/lang/dist$
|
||||
^contrib/lang/src/cl_ext.lang.egg-info$
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
*.orig
|
||||
__pycache__
|
||||
build/
|
||||
dist/
|
||||
src/cl_ext.lang.egg-info/
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2012, Mozilla Foundation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright owner nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,6 @@
|
|||
[bdist_wheel]
|
||||
universal=1
|
||||
|
||||
[options.entry_points]
|
||||
compare_locales.parsers =
|
||||
lang=cl_ext.lang.lang:LangParser
|
|
@ -0,0 +1,18 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="cl_ext.lang",
|
||||
author="Axel Hecht",
|
||||
author_email="axel@mozilla.com",
|
||||
description=".lang parser for compare-locales",
|
||||
platforms=["any"],
|
||||
python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4',
|
||||
package_dir={"": "src"},
|
||||
packages=['cl_ext', 'cl_ext.lang'],
|
||||
install_requires=[
|
||||
"parsimonious",
|
||||
"compare_locales",
|
||||
]
|
||||
)
|
|
@ -0,0 +1 @@
|
|||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
|
@ -0,0 +1,159 @@
|
|||
"""
|
||||
Parser for the .lang translation format.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
|
||||
from parsimonious.grammar import Grammar
|
||||
from parsimonious.nodes import NodeVisitor
|
||||
|
||||
from compare_locales.parser.base import Comment, LiteralEntity, Junk, Parser
|
||||
from compare_locales.paths import File
|
||||
|
||||
|
||||
BLANK_LINE = 'blank_line'
|
||||
TAG_REGEX = re.compile(r'\{(ok)\}', re.I)
|
||||
|
||||
|
||||
class LangComment(Comment):
|
||||
def __init__(self, marker, content, end):
|
||||
self.marker = marker
|
||||
self.raw_content = content
|
||||
self.end = end
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
return self.raw_content.strip()
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
return self.marker + self.raw_content + self.end
|
||||
|
||||
|
||||
class LangEntity(LiteralEntity):
|
||||
def __init__(self, source_string, translation_string, all, tags):
|
||||
super(LangEntity, self).__init__(
|
||||
key=source_string, # .lang files use the source as the key.
|
||||
val=translation_string,
|
||||
all=all,
|
||||
)
|
||||
|
||||
self.tags = set(tags)
|
||||
|
||||
@property
|
||||
def localized(self):
|
||||
return self.key != self.val or 'ok' in self.tags
|
||||
|
||||
@property
|
||||
def extra(self):
|
||||
return {'tags': list(self.tags)}
|
||||
|
||||
|
||||
class LangVisitor(NodeVisitor):
|
||||
grammar = Grammar(r"""
|
||||
lang_file = (comment / entity / blank_line)*
|
||||
|
||||
comment = "#"+ line_content line_ending
|
||||
line_content = ~r".*"
|
||||
line_ending = ~r"$\n?"m # Match at EOL and EOF without newline.
|
||||
|
||||
blank_line = ~r"((?!\n)\s)*" line_ending
|
||||
|
||||
entity = string translation
|
||||
string = ";" line_content line_ending
|
||||
translation = line_content line_ending
|
||||
""")
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__()
|
||||
self.ctx = ctx
|
||||
|
||||
def visit_lang_file(self, node, children):
|
||||
"""
|
||||
Find comments that are associated with an entity and add them
|
||||
to the entity's comments list. Also assign order to entities.
|
||||
"""
|
||||
comments = []
|
||||
order = 0
|
||||
for child in children:
|
||||
if isinstance(child, LangComment):
|
||||
comments.append(child)
|
||||
continue
|
||||
|
||||
if isinstance(child, LangEntity):
|
||||
child.comments = [c.content for c in comments]
|
||||
child.order = order
|
||||
order += 1
|
||||
|
||||
comments = []
|
||||
|
||||
return children
|
||||
|
||||
def visit_comment(self, node, node_info):
|
||||
marker, content, end = node_info
|
||||
return LangComment(
|
||||
node_text(marker), node_text(content), node_text(end)
|
||||
)
|
||||
|
||||
def visit_blank_line(self, node, _):
|
||||
return BLANK_LINE
|
||||
|
||||
def visit_entity(self, node, node_info):
|
||||
string, translation = node_info
|
||||
|
||||
# Strip tags out of translation if they exist.
|
||||
tags = []
|
||||
tag_matches = list(re.finditer(TAG_REGEX, translation))
|
||||
if tag_matches:
|
||||
tags = [m.group(1).lower() for m in tag_matches]
|
||||
translation = translation[:tag_matches[0].start()].strip()
|
||||
|
||||
if translation == '':
|
||||
return Junk(self.ctx, (0, 0))
|
||||
|
||||
return LangEntity(string, translation, node.text, tags)
|
||||
|
||||
def visit_string(self, node, node_info):
|
||||
marker, content, end = node_info
|
||||
return content.text.strip()
|
||||
|
||||
def visit_translation(self, node, node_info):
|
||||
content, end = node_info
|
||||
return content.text.strip()
|
||||
|
||||
def generic_visit(self, node, children):
|
||||
if children and len(children) == 1:
|
||||
return children[0]
|
||||
else:
|
||||
return children or node
|
||||
|
||||
|
||||
def node_text(node):
|
||||
"""
|
||||
Convert a Parsimonious node into text, including nodes that may
|
||||
actually be a list of nodes due to repetition.
|
||||
"""
|
||||
if node is None:
|
||||
return u''
|
||||
elif isinstance(node, list):
|
||||
return ''.join([n.text for n in node])
|
||||
else:
|
||||
return node.text
|
||||
|
||||
|
||||
class LangParser(Parser):
|
||||
def use(self, path):
|
||||
if isinstance(path, File):
|
||||
path = path.fullpath
|
||||
return path.endswith('.lang')
|
||||
|
||||
def walk(self, only_localizable=False):
|
||||
if not self.ctx:
|
||||
# loading file failed, or we just didn't load anything
|
||||
return
|
||||
ctx = self.ctx
|
||||
contents = ctx.contents
|
||||
for c in LangVisitor(ctx).parse(contents):
|
||||
if not only_localizable or isinstance(c, (LangEntity, Junk)):
|
||||
yield c
|
|
@ -0,0 +1,44 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import unittest
|
||||
from compare_locales import parser
|
||||
from parsimonious.exceptions import ParseError
|
||||
|
||||
|
||||
class TestLangParser(unittest.TestCase):
|
||||
def test_good(self):
|
||||
p = parser.getParser('foo.lang')
|
||||
p.readUnicode('''\
|
||||
# Sample comment
|
||||
;Source String
|
||||
Translated String
|
||||
|
||||
# First comment
|
||||
# Second comment
|
||||
;Multiple Comments
|
||||
Translated Multiple Comments
|
||||
|
||||
;No Comments or Sources
|
||||
Translated No Comments or Sources
|
||||
''')
|
||||
msgs = p.parse()
|
||||
self.assertEqual(len(msgs), 3)
|
||||
|
||||
def test_empty_translation(self):
|
||||
p = parser.getParser('foo.lang')
|
||||
p.readUnicode('''\
|
||||
# Sample comment
|
||||
;Source String
|
||||
|
||||
''')
|
||||
msgs = p.parse()
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertIsInstance(msgs[0], parser.Junk)
|
||||
|
||||
def test_bad(self):
|
||||
p = parser.getParser('foo.lang')
|
||||
p.readUnicode('''\
|
||||
just garbage
|
||||
''')
|
||||
with self.assertRaises(ParseError):
|
||||
p.parse()
|
13
tox.ini
13
tox.ini
|
@ -1,5 +1,5 @@
|
|||
[tox]
|
||||
envlist = py27, py35, py36, py37, flake8, integration
|
||||
envlist = py27, py35, py36, py37, flake8, lang, integration
|
||||
skipsdist=True
|
||||
|
||||
[travis]
|
||||
|
@ -8,10 +8,19 @@ python =
|
|||
|
||||
[testenv]
|
||||
commands=python -B setup.py test
|
||||
|
||||
[testenv:flake8]
|
||||
deps=flake8 >=3.7, <3.8
|
||||
basepython=python3.7
|
||||
commands=flake8 compare_locales
|
||||
commands=
|
||||
flake8 compare_locales contrib/lang
|
||||
|
||||
[testenv:integration]
|
||||
deps=six
|
||||
commands=python -m unittest discover -s compare_locales/integration_tests
|
||||
|
||||
[testenv:lang]
|
||||
basepython=python3.7
|
||||
deps=
|
||||
--editable=contrib/lang
|
||||
commands=python -m unittest discover contrib/lang/tests
|
||||
|
|
Загрузка…
Ссылка в новой задаче