gecko-dev/xpcom/ds/test/test_dafsa.py

524 строки
8.0 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from io import StringIO
from incremental_dafsa import Dafsa, Node
import mozunit
import unittest
def _node_to_string(node: Node, prefix, buffer, cache):
if not node.is_end_node:
prefix += str(ord(node.character)) if ord(node.character) < 10 else node.character
else:
prefix += "$"
cached = cache.get(id(node))
buffer.write("{}{}".format(prefix, "=" if cached else "").strip() + "\n")
if not cached:
cache[id(node)] = node
if node:
for node in sorted(node.children.values(), key=lambda n: n.character):
_node_to_string(node, prefix, buffer, cache)
def _dafsa_to_string(dafsa: Dafsa):
"""Encodes the dafsa into a string notation.
Each node is printed on its own line with all the nodes that precede it.
The end node is designated with the "$" character.
If it joins into an existing node, the end of the line is adorned with a "=".
Though this doesn't carry information about which other prefix it has joined with,
it has seemed to be precise enough for testing.
For example, with the input data of:
* a1
* ac1
* bc1
[root] --- a ---- 1 --- [end]
| | /
-- b -- c---
The output will be:
a
a1
a1$ <- end node was found
ac
ac1= <- joins with the "a1" prefix
b
bc= <- joins with the "ac" prefix
"""
buffer = StringIO()
cache = {}
for node in sorted(dafsa.root_node.children.values(), key=lambda n: n.character):
_node_to_string(node, "", buffer, cache)
return buffer.getvalue().strip()
def _to_words(data):
return [line.strip() for line in data.strip().split("\n")]
def _assert_dafsa(data, expected):
words = _to_words(data)
dafsa = Dafsa.from_tld_data(words)
expected = expected.strip()
expected = "\n".join([line.strip() for line in expected.split("\n")])
as_string = _dafsa_to_string(dafsa)
assert as_string == expected
class TestDafsa(unittest.TestCase):
def test_1(self):
_assert_dafsa(
"""
a1
ac1
acc1
bd1
bc1
bcc1
""",
"""
a
a1
a1$
ac
ac1=
acc
acc1=
b
bc=
bd
bd1=
""")
def test_2(self):
_assert_dafsa(
"""
ab1
b1
bb1
bbb1
""",
"""
a
ab
ab1
ab1$
b
b1=
bb
bb1=
bbb=
""")
def test_3(self):
_assert_dafsa(
"""
a.ca1
a.com1
c.corg1
b.ca1
b.com1
b.corg1
""",
"""
a
a.
a.c
a.ca
a.ca1
a.ca1$
a.co
a.com
a.com1=
b
b.
b.c
b.ca=
b.co
b.com=
b.cor
b.corg
b.corg1=
c
c.
c.c
c.co
c.cor=
""")
def test_4(self):
_assert_dafsa(
"""
acom1
bcomcom1
acomcom1
""",
"""
a
ac
aco
acom
acom1
acom1$
acomc
acomco
acomcom
acomcom1=
b
bc
bco
bcom
bcomc=
""")
def test_5(self):
_assert_dafsa(
"""
a.d1
a.c.d1
b.d1
b.c.d1
""",
"""
a
a.
a.c
a.c.
a.c.d
a.c.d1
a.c.d1$
a.d=
b
b.=
""")
def test_6(self):
_assert_dafsa(
"""
a61
a661
b61
b661
""",
"""
a
a6
a61
a61$
a66
a661=
b
b6=
""")
def test_7(self):
_assert_dafsa(
"""
a61
a6661
b61
b6661
""",
"""
a
a6
a61
a61$
a66
a666
a6661=
b
b6=
""")
def test_8(self):
_assert_dafsa(
"""
acc1
bc1
bccc1
""",
"""
a
ac
acc
acc1
acc1$
b
bc
bc1=
bcc=
""")
def test_9(self):
_assert_dafsa(
"""
acc1
bc1
bcc1
""",
"""
a
ac
acc
acc1
acc1$
b
bc
bc1=
bcc=
""")
def test_10(self):
_assert_dafsa(
"""
acc1
cc1
cccc1
""",
"""
a
ac
acc
acc1
acc1$
c
cc
cc1=
ccc=
""")
def test_11(self):
_assert_dafsa(
"""
ac1
acc1
bc1
bcc1
""",
"""
a
ac
ac1
ac1$
acc
acc1=
b
bc=
""")
def test_12(self):
_assert_dafsa(
"""
acd1
bcd1
bcdd1
""",
"""
a
ac
acd
acd1
acd1$
b
bc
bcd
bcd1=
bcdd=
""")
def test_13(self):
_assert_dafsa(
"""
ac1
acc1
bc1
bcc1
bccc1
""",
"""
a
ac
ac1
ac1$
acc
acc1=
b
bc
bc1=
bcc=
""")
def test_14(self):
_assert_dafsa(
"""
acc1
acccc1
bcc1
bcccc1
bcccccc1
""",
"""
a
ac
acc
acc1
acc1$
accc
acccc
acccc1=
b
bc
bcc
bcc1=
bccc=
""")
def test_15(self):
_assert_dafsa(
"""
ac1
bc1
acac1
""",
"""
a
ac
ac1
ac1$
aca
acac
acac1=
b
bc=
""")
def test_16(self):
_assert_dafsa(
"""
bat1
t1
tbat1
""",
"""
b
ba
bat
bat1
bat1$
t
t1=
tb=
""")
def test_17(self):
_assert_dafsa(
"""
acow1
acat1
t1
tcat1
acatcat1
""",
"""
a
ac
aca
acat
acat1
acat1$
acatc
acatca
acatcat
acatcat1=
aco
acow
acow1=
t=
""")
def test_18(self):
_assert_dafsa(
"""
bc1
abc1
abcxyzc1
""",
"""
a
ab
abc
abc1
abc1$
abcx
abcxy
abcxyz
abcxyzc
abcxyzc1=
b
bc=
""")
def test_19(self):
_assert_dafsa(
"""
a.z1
a.y1
c.z1
d.z1
d.y1
""",
"""
a
a.
a.y
a.y1
a.y1$
a.z
a.z1=
c
c.
c.z=
d
d.=
""")
def test_20(self):
_assert_dafsa(
"""
acz1
acy1
accz1
acccz1
bcz1
bcy1
bccz1
bcccz1
""",
"""
a
ac
acc
accc
acccz
acccz1
acccz1$
accz=
acy
acy1=
acz=
b
bc=
""")
if __name__ == '__main__':
mozunit.main()