зеркало из https://github.com/mozilla/kaldi.git
sandbox/lid: Extending scripts to run on test data.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3641 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
c437252251
Коммит
c2397e1d3b
|
@ -58,3 +58,8 @@ lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \
|
|||
|
||||
lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
|
||||
exp/extractor_2048 data/train exp/ivectors_train
|
||||
|
||||
lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
|
||||
exp/extractor_2048 data/test exp/ivectors_test
|
||||
|
||||
|
||||
|
|
|
@ -38,11 +38,11 @@ logistic-regression-eval $model "ark:$trials" scp:$train_ivectors "$scores"
|
|||
|
||||
logistic-regression-eval $model scp:$train_ivectors ark,t:- | \
|
||||
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \
|
||||
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_train/train_output
|
||||
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_train/output
|
||||
|
||||
# note: we treat the language as a sentence; it happens that the WER/SER
|
||||
# corresponds to the recognition error rate.
|
||||
compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/train_output
|
||||
compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/output
|
||||
|
||||
# It perfectly classifies the training data:
|
||||
#%WER 0.00 [ 0 / 10173, 0 ins, 0 del, 0 sub ]
|
||||
|
@ -50,4 +50,18 @@ compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/train_output
|
|||
#Scored 10173 sentences, 0 not present in hyp.
|
||||
|
||||
|
||||
logistic-regression-eval $model scp:exp/ivectors_test/ivector.scp ark,t:- | \
|
||||
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \
|
||||
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_test/output
|
||||
|
||||
|
||||
# someone needs to extend this to run on the dev data.
|
||||
|
||||
compute-wer --text ark:data/test/utt2lang ark:exp/ivectors_test/output
|
||||
|
||||
#compute-wer --text ark:data/test/utt2lang ark:exp/ivectors_test/output
|
||||
#compute-wer --text ark:data/test/utt2lang ark:exp/ivectors_test/output
|
||||
#%WER 2.97 [ 119 / 4000, 0 ins, 0 del, 119 sub ]
|
||||
#%SER 2.97 [ 119 / 4000 ]
|
||||
#Scored 4000 sentences, 0 not present in hyp.
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче