Configurable directories for dependencies and models; .compute file

This commit is contained in:
Tilman Kamp 2020-03-13 19:27:50 +01:00
Родитель b32ce062b8
Коммит a2229e9c8b
5 изменённых файлов: 24 добавлений и 11 удалений

11
.compute Normal file
Просмотреть файл

@ -0,0 +1,11 @@
#!/bin/bash
set -xe
apt-get install -y python3-venv cmake libboost-all-dev libeigen3-dev
export SW_DIR="/root"
export MODELS_DIR="${ML_GROUP_DIR}/language-models"
mkdir -p "${MODELS_DIR}"
bin/genlm --alphabet-mode utf8 en

Просмотреть файл

@ -2,5 +2,6 @@
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/test/kenlm" vcs="Git" />
</component>
</project>

Просмотреть файл

@ -1,5 +1,7 @@
#!/usr/bin/env bash
SW_DIR="${SW_DIR:-dependencies}"
if [ ! -d venv ]; then
python3 -m venv venv
source venv/bin/activate
@ -8,9 +10,9 @@ if [ ! -d venv ]; then
printf "\n ****** Installed Python packages ****** \n\n\n\n"
fi
if [ ! -f dependencies/kenlm/build/bin/lmplz ]; then
mkdir -p dependencies/kenlm
pushd dependencies
if [ ! -f "${SW_DIR}/kenlm/build/bin/lmplz" ]; then
mkdir -p "${SW_DIR}/kenlm"
pushd "${SW_DIR}"
git clone https://github.com/kpu/kenlm.git
pushd kenlm
@ -27,10 +29,10 @@ if [ ! -f dependencies/kenlm/build/bin/lmplz ]; then
printf "\n ****** Installed KenLM ****** \n\n\n\n"
fi
if [ ! -f dependencies/deepspeech/libdeepspeech.so ]; then
if [ ! -f "${SW_DIR}/deepspeech/libdeepspeech.so" ]; then
source venv/bin/activate
mkdir -p dependencies/deepspeech
python oscarlm/taskcluster.py --target dependencies/deepspeech --branch v0.6.0
mkdir -p "${SW_DIR}/deepspeech"
python oscarlm/taskcluster.py --target "${SW_DIR}/deepspeech" --branch v0.6.0
printf "\n ****** Installed DeepSpeech tools ****** \n\n\n\n"
fi

Просмотреть файл

@ -16,8 +16,9 @@ from utils import maybe_download, maybe_ungzip, maybe_join, section, log_progres
STOP_TOKEN = False
MAX_KEYS = 100000
KENLM_BIN = 'dependencies/kenlm/build/bin'
DEEPSPEECH_BIN = 'dependencies/deepspeech'
SW_DIR = os.getenv('SW_DIR', 'dependencies')
KENLM_BIN = SW_DIR + '/kenlm/build/bin'
DEEPSPEECH_BIN = SW_DIR + '/deepspeech'
def get_partial_path(index):
@ -226,8 +227,6 @@ def parse_args():
help='language of the model to generate')
parser.add_argument('--workers', type=int, default=os.cpu_count(),
help='number of preparation and counting workers')
parser.add_argument('--simulate', action='store_true',
help='simulate language model generation with small amount of input data')
parser.add_argument('--prune-factor', type=int, default=10,
help='times --vocabulary-size of items to keep in each vocabulary aggregator')
parser.add_argument('--vocabulary-size', type=int, default=500000,

Просмотреть файл

@ -13,7 +13,7 @@ def code_from_filename(filename):
FILE_DIR = os.path.dirname(__file__)
LANGUAGE_CODES = list(map(code_from_filename, glob(FILE_DIR + '/[!_]*.py')))
BASE_DIR = os.path.dirname(os.path.dirname(FILE_DIR))
MODELS_DIR = os.path.join(BASE_DIR, 'models')
MODELS_DIR = os.getenv('MODELS_DIR', os.path.join(BASE_DIR, 'models'))
class LanguageBase: