X-DeepSpeech: NOBUILD
This commit is contained in:
Reuben Morais 2020-01-21 12:49:51 +01:00
Родитель 1e2eb96248
Коммит 3b54f54524
2 изменённых файлов: 48 добавлений и 18 удалений

Просмотреть файл

@ -7,7 +7,7 @@ extension-pkg-whitelist=
# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=examples
ignore=native_client/kenlm
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.

Просмотреть файл

@ -5,7 +5,8 @@ from __future__ import absolute_import, division, print_function
# This script needs to be run from the root of the DeepSpeech repository
import os
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..', '..'))
sys.path.insert(1, os.path.join(sys.path[0], "..", ".."))
import argparse
import shutil
@ -14,13 +15,21 @@ from util.text import Alphabet, UTF8Alphabet
from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet
def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, default_alpha, default_beta):
def create_bundle(
alphabet_path,
lm_path,
vocab_path,
package_path,
force_utf8,
default_alpha,
default_beta,
):
words = set()
vocab_looks_char_based = True
with open(vocab_path) as fin:
for line in fin:
for word in line.split():
words.add(word.encode('utf-8'))
words.add(word.encode("utf-8"))
if len(word) > 1:
vocab_looks_char_based = False
print("{} unique words read from vocabulary file.".format(len(words)))
@ -30,7 +39,7 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8,
)
)
if force_utf8 != None:
if force_utf8 != None: # pylint: disable=singleton-comparison
use_utf8 = force_utf8.value
else:
use_utf8 = vocab_looks_char_based
@ -53,26 +62,29 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8,
scorer.load_lm(lm_path)
scorer.fill_dictionary(list(words))
shutil.copy(lm_path, package_path)
scorer.save_dictionary(package_path, True) # append, not overwrite
print('Package created in {}'.format(package_path))
scorer.save_dictionary(package_path, True) # append, not overwrite
print("Package created in {}".format(package_path))
class Tristate(object):
def __init__(self, value=None):
if any(value is v for v in (True, False, None)):
self.value = value
else:
raise ValueError("Tristate value must be True, False, or None")
if any(value is v for v in (True, False, None)):
self.value = value
else:
raise ValueError("Tristate value must be True, False, or None")
def __eq__(self, other):
return (self.value is other.value if isinstance(other, Tristate)
else self.value is other)
return (
self.value is other.value
if isinstance(other, Tristate)
else self.value is other
)
def __ne__(self, other):
return not self == other
return not self == other
def __bool__(self):
raise TypeError("Tristate object may not be used as a Boolean")
raise TypeError("Tristate object may not be used as a Boolean")
def __str__(self):
return str(self.value)
@ -100,8 +112,18 @@ def main():
help="Path of vocabulary file. Must contain words separated by whitespace.",
)
parser.add_argument("--package", required=True, help="Path to save scorer package.")
parser.add_argument("--default_alpha", type=float, required=True, help="Default value of alpha hyperparameter.")
parser.add_argument("--default_beta", type=float, required=True, help="Default value of beta hyperparameter.")
parser.add_argument(
"--default_alpha",
type=float,
required=True,
help="Default value of alpha hyperparameter.",
)
parser.add_argument(
"--default_beta",
type=float,
required=True,
help="Default value of beta hyperparameter.",
)
parser.add_argument(
"--force_utf8",
default="",
@ -116,7 +138,15 @@ def main():
else:
force_utf8 = Tristate(None)
create_bundle(args.alphabet, args.lm, args.vocab, args.package, force_utf8, args.default_alpha, args.default_beta)
create_bundle(
args.alphabet,
args.lm,
args.vocab,
args.package,
force_utf8,
args.default_alpha,
args.default_beta,
)
if __name__ == "__main__":