зеркало из https://github.com/mozilla/DeepSpeech.git
Родитель
1e2eb96248
Коммит
3b54f54524
|
@ -7,7 +7,7 @@ extension-pkg-whitelist=
|
|||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=examples
|
||||
ignore=native_client/kenlm
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
|
|
|
@ -5,7 +5,8 @@ from __future__ import absolute_import, division, print_function
|
|||
# This script needs to be run from the root of the DeepSpeech repository
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(1, os.path.join(sys.path[0], '..', '..'))
|
||||
|
||||
sys.path.insert(1, os.path.join(sys.path[0], "..", ".."))
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
|
@ -14,13 +15,21 @@ from util.text import Alphabet, UTF8Alphabet
|
|||
from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet
|
||||
|
||||
|
||||
def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, default_alpha, default_beta):
|
||||
def create_bundle(
|
||||
alphabet_path,
|
||||
lm_path,
|
||||
vocab_path,
|
||||
package_path,
|
||||
force_utf8,
|
||||
default_alpha,
|
||||
default_beta,
|
||||
):
|
||||
words = set()
|
||||
vocab_looks_char_based = True
|
||||
with open(vocab_path) as fin:
|
||||
for line in fin:
|
||||
for word in line.split():
|
||||
words.add(word.encode('utf-8'))
|
||||
words.add(word.encode("utf-8"))
|
||||
if len(word) > 1:
|
||||
vocab_looks_char_based = False
|
||||
print("{} unique words read from vocabulary file.".format(len(words)))
|
||||
|
@ -30,7 +39,7 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8,
|
|||
)
|
||||
)
|
||||
|
||||
if force_utf8 != None:
|
||||
if force_utf8 != None: # pylint: disable=singleton-comparison
|
||||
use_utf8 = force_utf8.value
|
||||
else:
|
||||
use_utf8 = vocab_looks_char_based
|
||||
|
@ -53,26 +62,29 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8,
|
|||
scorer.load_lm(lm_path)
|
||||
scorer.fill_dictionary(list(words))
|
||||
shutil.copy(lm_path, package_path)
|
||||
scorer.save_dictionary(package_path, True) # append, not overwrite
|
||||
print('Package created in {}'.format(package_path))
|
||||
scorer.save_dictionary(package_path, True) # append, not overwrite
|
||||
print("Package created in {}".format(package_path))
|
||||
|
||||
|
||||
class Tristate(object):
|
||||
def __init__(self, value=None):
|
||||
if any(value is v for v in (True, False, None)):
|
||||
self.value = value
|
||||
else:
|
||||
raise ValueError("Tristate value must be True, False, or None")
|
||||
if any(value is v for v in (True, False, None)):
|
||||
self.value = value
|
||||
else:
|
||||
raise ValueError("Tristate value must be True, False, or None")
|
||||
|
||||
def __eq__(self, other):
|
||||
return (self.value is other.value if isinstance(other, Tristate)
|
||||
else self.value is other)
|
||||
return (
|
||||
self.value is other.value
|
||||
if isinstance(other, Tristate)
|
||||
else self.value is other
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
raise TypeError("Tristate object may not be used as a Boolean")
|
||||
raise TypeError("Tristate object may not be used as a Boolean")
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
@ -100,8 +112,18 @@ def main():
|
|||
help="Path of vocabulary file. Must contain words separated by whitespace.",
|
||||
)
|
||||
parser.add_argument("--package", required=True, help="Path to save scorer package.")
|
||||
parser.add_argument("--default_alpha", type=float, required=True, help="Default value of alpha hyperparameter.")
|
||||
parser.add_argument("--default_beta", type=float, required=True, help="Default value of beta hyperparameter.")
|
||||
parser.add_argument(
|
||||
"--default_alpha",
|
||||
type=float,
|
||||
required=True,
|
||||
help="Default value of alpha hyperparameter.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--default_beta",
|
||||
type=float,
|
||||
required=True,
|
||||
help="Default value of beta hyperparameter.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force_utf8",
|
||||
default="",
|
||||
|
@ -116,7 +138,15 @@ def main():
|
|||
else:
|
||||
force_utf8 = Tristate(None)
|
||||
|
||||
create_bundle(args.alphabet, args.lm, args.vocab, args.package, force_utf8, args.default_alpha, args.default_beta)
|
||||
create_bundle(
|
||||
args.alphabet,
|
||||
args.lm,
|
||||
args.vocab,
|
||||
args.package,
|
||||
force_utf8,
|
||||
args.default_alpha,
|
||||
args.default_beta,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Загрузка…
Ссылка в новой задаче