зеркало из https://github.com/mozilla/scorertool.git
Configurable directories for dependencies and models; .compute file
This commit is contained in:
Родитель
b32ce062b8
Коммит
a2229e9c8b
|
@ -0,0 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
apt-get install -y python3-venv cmake libboost-all-dev libeigen3-dev
|
||||
|
||||
export SW_DIR="/root"
|
||||
export MODELS_DIR="${ML_GROUP_DIR}/language-models"
|
||||
mkdir -p "${MODELS_DIR}"
|
||||
|
||||
bin/genlm --alphabet-mode utf8 en
|
|
@ -2,5 +2,6 @@
|
|||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/test/kenlm" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
14
bin/prepare
14
bin/prepare
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
SW_DIR="${SW_DIR:-dependencies}"
|
||||
|
||||
if [ ! -d venv ]; then
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
|
@ -8,9 +10,9 @@ if [ ! -d venv ]; then
|
|||
printf "\n ****** Installed Python packages ****** \n\n\n\n"
|
||||
fi
|
||||
|
||||
if [ ! -f dependencies/kenlm/build/bin/lmplz ]; then
|
||||
mkdir -p dependencies/kenlm
|
||||
pushd dependencies
|
||||
if [ ! -f "${SW_DIR}/kenlm/build/bin/lmplz" ]; then
|
||||
mkdir -p "${SW_DIR}/kenlm"
|
||||
pushd "${SW_DIR}"
|
||||
|
||||
git clone https://github.com/kpu/kenlm.git
|
||||
pushd kenlm
|
||||
|
@ -27,10 +29,10 @@ if [ ! -f dependencies/kenlm/build/bin/lmplz ]; then
|
|||
printf "\n ****** Installed KenLM ****** \n\n\n\n"
|
||||
fi
|
||||
|
||||
if [ ! -f dependencies/deepspeech/libdeepspeech.so ]; then
|
||||
if [ ! -f "${SW_DIR}/deepspeech/libdeepspeech.so" ]; then
|
||||
source venv/bin/activate
|
||||
mkdir -p dependencies/deepspeech
|
||||
python oscarlm/taskcluster.py --target dependencies/deepspeech --branch v0.6.0
|
||||
mkdir -p "${SW_DIR}/deepspeech"
|
||||
python oscarlm/taskcluster.py --target "${SW_DIR}/deepspeech" --branch v0.6.0
|
||||
printf "\n ****** Installed DeepSpeech tools ****** \n\n\n\n"
|
||||
fi
|
||||
|
||||
|
|
|
@ -16,8 +16,9 @@ from utils import maybe_download, maybe_ungzip, maybe_join, section, log_progres
|
|||
STOP_TOKEN = False
|
||||
MAX_KEYS = 100000
|
||||
|
||||
KENLM_BIN = 'dependencies/kenlm/build/bin'
|
||||
DEEPSPEECH_BIN = 'dependencies/deepspeech'
|
||||
SW_DIR = os.getenv('SW_DIR', 'dependencies')
|
||||
KENLM_BIN = SW_DIR + '/kenlm/build/bin'
|
||||
DEEPSPEECH_BIN = SW_DIR + '/deepspeech'
|
||||
|
||||
|
||||
def get_partial_path(index):
|
||||
|
@ -226,8 +227,6 @@ def parse_args():
|
|||
help='language of the model to generate')
|
||||
parser.add_argument('--workers', type=int, default=os.cpu_count(),
|
||||
help='number of preparation and counting workers')
|
||||
parser.add_argument('--simulate', action='store_true',
|
||||
help='simulate language model generation with small amount of input data')
|
||||
parser.add_argument('--prune-factor', type=int, default=10,
|
||||
help='times --vocabulary-size of items to keep in each vocabulary aggregator')
|
||||
parser.add_argument('--vocabulary-size', type=int, default=500000,
|
||||
|
|
|
@ -13,7 +13,7 @@ def code_from_filename(filename):
|
|||
FILE_DIR = os.path.dirname(__file__)
|
||||
LANGUAGE_CODES = list(map(code_from_filename, glob(FILE_DIR + '/[!_]*.py')))
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(FILE_DIR))
|
||||
MODELS_DIR = os.path.join(BASE_DIR, 'models')
|
||||
MODELS_DIR = os.getenv('MODELS_DIR', os.path.join(BASE_DIR, 'models'))
|
||||
|
||||
|
||||
class LanguageBase:
|
||||
|
|
Загрузка…
Ссылка в новой задаче