зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1083971 - Add an option to output a binary file for the PSL data r=leplatrem,erahm
Differential Revision: https://phabricator.services.mozilla.com/D34364 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
e8c093c291
Коммит
3ac5afd32b
|
@ -8,7 +8,7 @@ import imp
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
from make_dafsa import words_to_cxx
|
||||
from make_dafsa import words_to_cxx, words_to_bin
|
||||
|
||||
"""
|
||||
Processes a file containing effective TLD data. See the following URL for a
|
||||
|
@ -98,11 +98,12 @@ class EffectiveTLDEntry:
|
|||
# DO EVERYTHING #
|
||||
#################
|
||||
|
||||
def main(output, effective_tld_filename):
|
||||
def main(output, effective_tld_filename, output_format="cxx"):
|
||||
"""
|
||||
effective_tld_filename is the effective TLD file to parse.
|
||||
A C++ array of a binary representation of a DAFSA representing the
|
||||
eTLD file is then printed to output.
|
||||
based on the output format, either a C++ array of a binary representation
|
||||
of a DAFSA representing the eTLD file is then printed to standard output
|
||||
or a binary file is written to disk.
|
||||
"""
|
||||
|
||||
def typeEnum(etld):
|
||||
|
@ -123,7 +124,26 @@ def main(output, effective_tld_filename):
|
|||
for etld in getEffectiveTLDs(effective_tld_filename):
|
||||
yield "%s%d" % (etld.domain(), typeEnum(etld))
|
||||
|
||||
output.write(words_to_cxx(dafsa_words()))
|
||||
""" words_to_bin() returns a bytes while words_to_cxx() returns string """
|
||||
if output_format == "bin":
|
||||
if sys.version_info[0] >= 3:
|
||||
output = output.buffer
|
||||
output.write(words_to_bin(dafsa_words()))
|
||||
else:
|
||||
output.write(words_to_cxx(dafsa_words()))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.stdout, sys.argv[1])
|
||||
"""
|
||||
This program can output the DAFSA in two formats:
|
||||
as C++ code that will be included and compiled at build time
|
||||
or as a binary file that will be published in Remote Settings.
|
||||
|
||||
Flags for format options:
|
||||
"cxx" -> C++ array [default]
|
||||
"bin" -> Binary file
|
||||
"""
|
||||
|
||||
output_format = "bin" if "--bin" in sys.argv else "cxx"
|
||||
main(sys.stdout, sys.argv[1], output_format=output_format)
|
||||
|
|
|
@ -193,6 +193,7 @@ The bytes in the generated array has the following meaning:
|
|||
"""
|
||||
|
||||
import sys
|
||||
import struct
|
||||
|
||||
|
||||
class InputError(Exception):
|
||||
|
@ -382,7 +383,7 @@ def encode_prefix(label):
|
|||
"""Encodes a node label as a list of bytes without a trailing high byte.
|
||||
|
||||
This method encodes a node if there is exactly one child and the
|
||||
child follows immidiately after so that no jump is needed. This label
|
||||
child follows immediately after so that no jump is needed. This label
|
||||
will then be a prefix to the label in the child node.
|
||||
"""
|
||||
assert label
|
||||
|
@ -416,6 +417,13 @@ def encode(dafsa):
|
|||
output.reverse()
|
||||
return output
|
||||
|
||||
def encode_words(words):
|
||||
"""Generates a dafsa representation of a word list"""
|
||||
dafsa = to_dafsa(words)
|
||||
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
|
||||
dafsa = fun(dafsa)
|
||||
return dafsa
|
||||
|
||||
|
||||
def to_cxx(data, preamble=None):
|
||||
"""Generates C++ code from a list of encoded bytes."""
|
||||
|
@ -439,12 +447,17 @@ def to_cxx(data, preamble=None):
|
|||
|
||||
def words_to_cxx(words, preamble=None):
|
||||
"""Generates C++ code from a word list"""
|
||||
dafsa = to_dafsa(words)
|
||||
for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels):
|
||||
dafsa = fun(dafsa)
|
||||
dafsa = encode_words(words)
|
||||
return to_cxx(encode(dafsa), preamble)
|
||||
|
||||
|
||||
def words_to_bin(words):
|
||||
"""Generates bytes from a word list"""
|
||||
dafsa = encode_words(words)
|
||||
data = encode(dafsa)
|
||||
return struct.pack('%dB' % len(data), *data)
|
||||
|
||||
|
||||
def parse_gperf(infile):
|
||||
"""Parses gperf file and extract strings and return code"""
|
||||
lines = [line.strip() for line in infile]
|
||||
|
|
Загрузка…
Ссылка в новой задаче