sandbox/lid: Extending scripts to run on test data.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3641 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2014-03-01 00:19:06 +00:00
Родитель c437252251
Коммит c2397e1d3b
2 изменённых файлов: 21 добавлений и 2 удалений

Просмотреть файл

@ -58,3 +58,8 @@ lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \
lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
exp/extractor_2048 data/train exp/ivectors_train
lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
exp/extractor_2048 data/test exp/ivectors_test

Просмотреть файл

@ -38,11 +38,11 @@ logistic-regression-eval $model "ark:$trials" scp:$train_ivectors "$scores"
logistic-regression-eval $model scp:$train_ivectors ark,t:- | \
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_train/train_output
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_train/output
# note: we treat the language as a sentence; it happens that the WER/SER
# corresponds to the recognition error rate.
compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/train_output
compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/output
# It perfectly classifies the training data:
#%WER 0.00 [ 0 / 10173, 0 ins, 0 del, 0 sub ]
@ -50,4 +50,18 @@ compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/train_output
#Scored 10173 sentences, 0 not present in hyp.
logistic-regression-eval $model scp:exp/ivectors_test/ivector.scp ark,t:- | \
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_test/output
# someone needs to extend this to run on the dev data.
compute-wer --text ark:data/test/utt2lang ark:exp/ivectors_test/output
#compute-wer --text ark:data/test/utt2lang ark:exp/ivectors_test/output
#compute-wer --text ark:data/test/utt2lang ark:exp/ivectors_test/output
#%WER 2.97 [ 119 / 4000, 0 ins, 0 del, 119 sub ]
#%SER 2.97 [ 119 / 4000 ]
#Scored 4000 sentences, 0 not present in hyp.