зеркало из https://github.com/mozilla/kaldi.git
RNNLM rescoring for the librispeech recipe
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4606 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
3eccc58810
Коммит
d22872a517
|
@ -160,3 +160,35 @@
|
|||
%WER 17.16 [ 8982 / 52343, 855 ins, 1421 del, 6706 sub ] exp/nnet2_online/nnet_a_online/decode_test_other_tgmed/wer_12
|
||||
%WER 18.90 [ 9891 / 52343, 798 ins, 1786 del, 7307 sub ] exp/nnet2_online/nnet_a_online/decode_test_other_tgsmall/wer_13
|
||||
|
||||
# RNNLM rescoring of tri6b
|
||||
|
||||
%WER 7.50 [ 4080 / 54402, 617 ins, 416 del, 3047 sub ] exp/tri6b/decode_tglarge_dev_clean/wer_14
|
||||
%WER 7.09 [ 3859 / 54402, 611 ins, 354 del, 2894 sub ] exp/tri6b/decode_tglarge_dev_clean_rnnlm_h150_me5-1000_L0.25/wer_14
|
||||
%WER 7.29 [ 3968 / 54402, 661 ins, 332 del, 2975 sub ] exp/tri6b/decode_tglarge_dev_clean_rnnlm_h150_me5-1000_L0.5/wer_13
|
||||
%WER 7.73 [ 4205 / 54402, 709 ins, 349 del, 3147 sub ] exp/tri6b/decode_tglarge_dev_clean_rnnlm_h150_me5-1000_L0.75/wer_12
|
||||
|
||||
%WER 21.94 [ 11180 / 50948, 1264 ins, 1506 del, 8410 sub ] exp/tri6b/decode_tglarge_dev_other/wer_16
|
||||
%WER 21.36 [ 10881 / 50948, 1309 ins, 1362 del, 8210 sub ] exp/tri6b/decode_tglarge_dev_other_rnnlm_h150_me5-1000_L0.25/wer_16
|
||||
%WER 21.29 [ 10848 / 50948, 1330 ins, 1324 del, 8194 sub ] exp/tri6b/decode_tglarge_dev_other_rnnlm_h150_me5-1000_L0.5/wer_16
|
||||
%WER 21.75 [ 11082 / 50948, 1351 ins, 1346 del, 8385 sub ] exp/tri6b/decode_tglarge_dev_other_rnnlm_h150_me5-1000_L0.75/wer_17
|
||||
|
||||
%WER 9.39 [ 5106 / 54402, 597 ins, 648 del, 3861 sub ] exp/tri6b/decode_tgmed_dev_clean/wer_14
|
||||
%WER 8.09 [ 4400 / 54402, 564 ins, 517 del, 3319 sub ] exp/tri6b/decode_tgmed_dev_clean_rnnlm_h150_me5-1000_L0.25/wer_15
|
||||
%WER 8.00 [ 4350 / 54402, 609 ins, 472 del, 3269 sub ] exp/tri6b/decode_tgmed_dev_clean_rnnlm_h150_me5-1000_L0.5/wer_15
|
||||
%WER 8.21 [ 4467 / 54402, 692 ins, 415 del, 3360 sub ] exp/tri6b/decode_tgmed_dev_clean_rnnlm_h150_me5-1000_L0.75/wer_12
|
||||
|
||||
%WER 25.16 [ 12816 / 50948, 1175 ins, 2076 del, 9565 sub ] exp/tri6b/decode_tgmed_dev_other/wer_16
|
||||
%WER 23.28 [ 11861 / 50948, 1289 ins, 1546 del, 9026 sub ] exp/tri6b/decode_tgmed_dev_other_rnnlm_h150_me5-1000_L0.25/wer_14
|
||||
%WER 23.03 [ 11732 / 50948, 1341 ins, 1467 del, 8924 sub ] exp/tri6b/decode_tgmed_dev_other_rnnlm_h150_me5-1000_L0.5/wer_14
|
||||
%WER 23.12 [ 11779 / 50948, 1351 ins, 1476 del, 8952 sub ] exp/tri6b/decode_tgmed_dev_other_rnnlm_h150_me5-1000_L0.75/wer_15
|
||||
|
||||
%WER 10.66 [ 5800 / 54402, 558 ins, 854 del, 4388 sub ] exp/tri6b/decode_tgsmall_dev_clean/wer_15
|
||||
%WER 8.78 [ 4779 / 54402, 586 ins, 588 del, 3605 sub ] exp/tri6b/decode_tgsmall_dev_clean_rnnlm_h150_me5-1000_L0.25/wer_14
|
||||
%WER 8.50 [ 4624 / 54402, 661 ins, 505 del, 3458 sub ] exp/tri6b/decode_tgsmall_dev_clean_rnnlm_h150_me5-1000_L0.5/wer_13
|
||||
%WER 8.56 [ 4659 / 54402, 674 ins, 485 del, 3500 sub ] exp/tri6b/decode_tgsmall_dev_clean_rnnlm_h150_me5-1000_L0.75/wer_13
|
||||
|
||||
%WER 27.18 [ 13850 / 50948, 1192 ins, 2340 del, 10318 sub ] exp/tri6b/decode_tgsmall_dev_other/wer_15
|
||||
%WER 24.72 [ 12596 / 50948, 1291 ins, 1803 del, 9502 sub ] exp/tri6b/decode_tgsmall_dev_other_rnnlm_h150_me5-1000_L0.25/wer_14
|
||||
%WER 24.18 [ 12317 / 50948, 1284 ins, 1732 del, 9301 sub ] exp/tri6b/decode_tgsmall_dev_other_rnnlm_h150_me5-1000_L0.5/wer_15
|
||||
%WER 24.19 [ 12323 / 50948, 1327 ins, 1686 del, 9310 sub ] exp/tri6b/decode_tgsmall_dev_other_rnnlm_h150_me5-1000_L0.75/wer_15
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ tmpdir=data/local/lm_tmp
|
|||
lexicon=data/local/lang_tmp/lexiconp.txt
|
||||
mkdir -p $tmpdir
|
||||
|
||||
for lm_suffix in tgsmall tgmed; do
|
||||
for lm_suffix in tgsmall tgmed tglarge; do
|
||||
test=data/lang_test_${lm_suffix}
|
||||
mkdir -p $test
|
||||
for f in phones.txt words.txt phones.txt L.fst L_disambig.fst phones/; do
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2014 Yandex (Author: Ilya Edrenkin)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration section.
|
||||
hidden=150
|
||||
maxent_order=5
|
||||
maxent_size=1000
|
||||
num_threads=16
|
||||
stage=0
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
. path.sh
|
||||
. utils/parse_options.sh
|
||||
|
||||
set -e
|
||||
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "Usage: $0 <data-dir> <lm-dir>"
|
||||
echo "e.g.: $0 /export/a15/vpanayotov/data/lm data/local/lm"
|
||||
echo ", where:"
|
||||
echo " <data-dir> is the directory in which the text corpus is downloaded"
|
||||
echo " <lm-dir> is the directory in which the language model is stored"
|
||||
echo "Main options:"
|
||||
echo " --hidden <int> # default 150. Hidden layer size"
|
||||
echo " --maxent-order <int> # default 5. Maxent features order size"
|
||||
echo " --maxent-size <int> # default 1000. Maxent features hash size"
|
||||
echo " --num-threads <int> # default 16. Number of concurrent threadss to train RNNLM"
|
||||
echo " --stage <int> # 1 to download and prepare data, 2 to train RNNLM, 3 to rescore tri6b with a trained RNNLM"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
s5_dir=`pwd`
|
||||
data_dir=`readlink -f $1`
|
||||
lm_dir=`readlink -f $2`
|
||||
rnnlm_ver=rnnlm-hs-0.1b # Probably could make this an option, but Tomas's RNN will take long to train on 200K vocab
|
||||
rnnlmdir=data/lang_rnnlm_h${hidden}_me${maxent_order}-${maxent_size}
|
||||
export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
|
||||
|
||||
if [ $stage -le 1 ]; then
|
||||
echo "$0: Prepare training data for RNNLM"
|
||||
cd $data_dir
|
||||
wget http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz
|
||||
gunzip librispeech-lm-norm.txt.gz
|
||||
$s5_dir/utils/filt.py $lm_dir/librispeech-vocab.txt librispeech-lm-norm.txt | shuf > librispeech-lm-norm.train.txt
|
||||
$s5_dir/utils/filt.py $lm_dir/librispeech-vocab.txt <(awk '{$1=""; print $0}' $s5_dir/data/train_960/text) > librispeech-lm-norm.dev.txt
|
||||
rm librispeech-lm-norm.txt
|
||||
cd $s5_dir
|
||||
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
echo "$0: Training RNNLM. It will probably take several hours."
|
||||
cd $KALDI_ROOT/tools
|
||||
if [ -f $rnnlm_ver/rnnlm ]; then
|
||||
echo "Not installing the rnnlm toolkit since it is already there."
|
||||
else
|
||||
extras/install_rnnlm_hs.sh
|
||||
fi
|
||||
cd $s5_dir
|
||||
mkdir -p $rnnlmdir
|
||||
rnnlm -rnnlm $rnnlmdir/rnnlm -train $data_dir/librispeech-lm-norm.train.txt -valid $data_dir/librispeech-lm-norm.dev.txt \
|
||||
-threads $num_threads -hidden $hidden -direct-order $maxent_order -direct $maxent_size -retry 1 -stop 1.0
|
||||
touch $rnnlmdir/unk.probs
|
||||
awk '{print $1}' $rnnlmdir/rnnlm > $rnnlmdir/wordlist.rnn
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
echo "$0: Performing RNNLM rescoring on tri6b decoding results"
|
||||
for lm in tgsmall tgmed tglarge; do
|
||||
for devset in dev_clean dev_other; do
|
||||
sourcedir=exp/tri6b/decode_${lm}_${devset}
|
||||
resultsdir=${sourcedir}_rnnlm_h${hidden}_me${maxent_order}-${maxent_size}
|
||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 0.5 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.5
|
||||
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.25
|
||||
cp -r ${resultsdir}_L0.5 ${resultsdir}_L0.75
|
||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 --stage 7 0.25 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.25
|
||||
steps/rnnlmrescore.sh --rnnlm_ver rnnlm-hs-0.1b --N 100 --stage 7 0.75 data/lang_test_$lm $rnnlmdir data/$devset $sourcedir ${resultsdir}_L0.75
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
|
|
@ -258,6 +258,10 @@ steps/train_quick.sh --cmd "$train_cmd" \
|
|||
done
|
||||
)&
|
||||
|
||||
# Perform RNNLM rescoring of tri6b
|
||||
# Attention: with default settings requires 4 GB of memory per rescoring job, so commenting this out by default
|
||||
# local/run_rnnlm.sh $data data/local/lm
|
||||
|
||||
# train NN models on the entire dataset
|
||||
local/nnet2/run_7a_960.sh || exit 1
|
||||
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Apache 2.0
|
||||
|
||||
import sys
|
||||
|
||||
vocab=set()
|
||||
with open(sys.argv[1]) as vocabfile:
|
||||
for line in vocabfile:
|
||||
vocab.add(line.strip())
|
||||
|
||||
with open(sys.argv[2]) as textfile:
|
||||
for line in textfile:
|
||||
print " ".join(map(lambda word: word if word in vocab else '<UNK>', line.strip().split()))
|
|
@ -632,6 +632,19 @@ real EvaluateModel(char* filename, int printLoglikes) {
|
|||
}
|
||||
}
|
||||
real logprob = 0.0;
|
||||
|
||||
int maxent_present = maxent_order;
|
||||
// Check if we should exclude some ME features that were probably not learned
|
||||
for(int order = 0; order < maxent_order && target - order >= 0; ++order) {
|
||||
for (d = 0; d < vocab[word].codelen; d++) {
|
||||
if (nnet.synMaxent[feature_hashes[order] + vocab[word].point[d]] == 0) {
|
||||
// Make ME hash act a Bloom filter: if a weight is zero, it was probably never touched by training and this (an higher) ngrams should not be considered for this target.
|
||||
maxent_present = order;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (d = 0; d < vocab[word].codelen; d++) {
|
||||
// Propagate hidden -> output
|
||||
f = 0.0;
|
||||
|
@ -639,7 +652,7 @@ real EvaluateModel(char* filename, int printLoglikes) {
|
|||
for(int c = 0; c < layer1_size; ++c) {
|
||||
f += neu1[layer1_size*(target - 1) + c] * nnet.syn1[l2 + c];
|
||||
}
|
||||
for(int order = 0; order < maxent_order && target - order >= 0; ++order) {
|
||||
for(int order = 0; order < maxent_present && target - order >= 0; ++order) {
|
||||
f += nnet.synMaxent[feature_hashes[order] + vocab[word].point[d]];
|
||||
}
|
||||
logprob += log10(1+(vocab[word].code[d] == 1 ? exp(f) : exp(-f)));
|
||||
|
@ -704,14 +717,15 @@ void Sample(int num_sentences, int interactive) {
|
|||
}
|
||||
|
||||
long long feature_hashes[MAX_NGRAM_ORDER] = {0};
|
||||
|
||||
if(maxent_order) {
|
||||
for(int order = 0; order < maxent_order && input >= order; ++order) {
|
||||
for(int order = 0; order < maxent_order && input + 1 >= order; ++order) {
|
||||
feature_hashes[order] = PRIMES[0]*PRIMES[1];
|
||||
for (int b = 1; b <= order; ++b) feature_hashes[order] += PRIMES[(order*PRIMES[b]+b) % PRIMES_SIZE]*(unsigned long long)(sen[input-b]+1);
|
||||
for (int b = 1; b <= order; ++b) feature_hashes[order] += PRIMES[(order*PRIMES[b]+b) % PRIMES_SIZE]*(unsigned long long)(sen[input+1-b]+1);
|
||||
feature_hashes[order] = feature_hashes[order] % (maxent_hash_size - vocab_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int node = vocab_size - 2;
|
||||
while(node > 0) {
|
||||
// Propagate hidden -> output
|
||||
|
@ -720,7 +734,7 @@ void Sample(int num_sentences, int interactive) {
|
|||
for(int c = 0; c < layer1_size; ++c) {
|
||||
f += neu1[input*layer1_size + c] * nnet.syn1[l2 + c];
|
||||
}
|
||||
for(int order = 0; order < maxent_order && input >= order; ++order) {
|
||||
for(int order = 0; order < maxent_order && input + 1 >= order; ++order) {
|
||||
f += nnet.synMaxent[feature_hashes[order] + node];
|
||||
}
|
||||
f = exp(f)/(1+exp(f)); // sigmoid
|
||||
|
|
Загрузка…
Ссылка в новой задаче