Script changes and updating RESULTS files.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@45 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2011-05-18 22:50:00 +00:00
Родитель fb0e51410b
Коммит 9d5dc7f9e9
11 изменённых файлов: 159 добавлений и 42 удалений

Просмотреть файл

@ -110,7 +110,6 @@ while [ $x -lt $numiters ]; do
compose-transforms --print-args=false $dir/$x.mat.new $cur_lda $dir/$x.mat || exit 1; compose-transforms --print-args=false $dir/$x.mat.new $cur_lda $dir/$x.mat || exit 1;
cur_lda=$dir/$x.mat cur_lda=$dir/$x.mat
feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $cur_lda ark:- ark:-|" feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $cur_lda ark:- ark:-|"
# Subset of features used to train MLLT transforms. # Subset of features used to train MLLT transforms.
featsub="ark:scripts/subset_scp.pl 800 data/train.scp | splice-feats scp:- ark:- | transform-feats $cur_lda ark:- ark:-|" featsub="ark:scripts/subset_scp.pl 800 data/train.scp | splice-feats scp:- ark:- | transform-feats $cur_lda ark:- ark:-|"

Просмотреть файл

@ -0,0 +1,79 @@
Note on baselines:
--
Baseline 1:
"Robust Decision Tree State Tying for Continuous Speech Recognition",
Wolfgang Reichl and Wu Chou, IEEE Trans. Speech and Audio Processing (2000),
give the following numbers for gender-independent systems:
Trained on SI-84, tested on Nov'92 (20k open): bigram 14.4%, trigram 12.8%.
Trained on SI-284, 20k open, tested on Nov'92 and '93:
bigram: Nov'92, 11.9%; nov'93, 15.4% [c.f. us (tri3a_bg_eval92, 11.82; tri3a_bg_eval93, 15.00)]
trigram: Nov'92, 9.8%; nov'93, 13.4%
--
Baseline 2:
"Large Vocabulary Continuous Speech Recognition using HTK", by P. C. Woodland et. al,
ICASSP 1994.
Table 2: gender dependent xwrd system, SI-284 training, 20k test:
bigram: Nov'92, 11.08%; Nov'93,14.45% [c.f. us (tri3a_bg_eval92, 11.82; tri3a_bg_eval93, 15.00)]
trigram: Nov'92, 9.46%, Nov'93, 12.74%
--
exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
exp/decode_tri1_tgpr_eval92/wer:%WER 13.30 [ 750 / 5641, 133 ins, 74 del, 543 sub ]
exp/decode_tri2a_bg_eval92/wer:%WER 14.25 [ 804 / 5641, 146 ins, 87 del, 571 sub ]
exp/decode_tri2a_bg_eval93/wer:%WER 20.97 [ 721 / 3439, 62 ins, 122 del, 537 sub ]
exp/decode_tri2a_tgpr_eval92/wer:%WER 12.52 [ 706 / 5641, 127 ins, 60 del, 519 sub ]
exp/decode_tri2a_tgpr_eval93/wer:%WER 18.29 [ 629 / 3439, 47 ins, 104 del, 478 sub ]
exp/decode_tri2a_tgpr_fmllr_eval92/wer:%WER 11.42 [ 644 / 5641, 116 ins, 60 del, 468 sub ]
exp/decode_tri2a_tgpr_fmllr_utt_eval92/wer:%WER 12.48 [ 704 / 5641, 128 ins, 56 del, 520 sub ]
exp/decode_tri2b_tgpr_eval92/wer:%WER 11.31 [ 638 / 5641, 128 ins, 58 del, 452 sub ]
exp/decode_tri2b_tgpr_eval93/wer:%WER 16.14 [ 555 / 3439, 59 ins, 89 del, 407 sub ]
exp/decode_tri2b_tgpr_fmllr_eval92/wer:%WER 10.25 [ 578 / 5641, 111 ins, 53 del, 414 sub ]
exp/decode_tri2b_tgpr_fmllr_eval93/wer:%WER 14.13 [ 486 / 3439, 46 ins, 84 del, 356 sub ]
exp/decode_tri2b_tgpr_utt_eval92/wer:%WER 11.26 [ 635 / 5641, 122 ins, 62 del, 451 sub ]
exp/decode_tri2b_tgpr_utt_eval93/wer:%WER 15.93 [ 548 / 3439, 56 ins, 91 del, 401 sub ]
exp/decode_tri2b_tgpr_utt_fmllr_eval92/wer:%WER 11.19 [ 631 / 5641, 122 ins, 61 del, 448 sub ]
exp/decode_tri2b_tgpr_utt_fmllr_eval93/wer:%WER 15.85 [ 545 / 3439, 54 ins, 93 del, 398 sub ]
exp/decode_tri2c_tgpr_eval92/wer:%WER 12.71 [ 717 / 5641, 137 ins, 72 del, 508 sub ]
exp/decode_tri2c_tgpr_eval93/wer:%WER 17.01 [ 585 / 3439, 61 ins, 85 del, 439 sub ]
exp/decode_tri2c_tgpr_utt_eval92/wer:%WER 12.96 [ 731 / 5641, 148 ins, 67 del, 516 sub ]
exp/decode_tri2d_tgpr_eval92/wer:%WER 13.03 [ 735 / 5641, 138 ins, 74 del, 523 sub ]
exp/decode_tri2d_tgpr_eval93/wer:%WER 19.40 [ 667 / 3439, 48 ins, 130 del, 489 sub ]
exp/decode_tri2e_tgpr_eval92/wer:%WER 14.29 [ 806 / 5641, 155 ins, 79 del, 572 sub ]
exp/decode_tri2e_tgpr_eval93/wer:%WER 19.08 [ 656 / 3439, 71 ins, 120 del, 465 sub ]
exp/decode_tri2f_tgpr_eval92/wer:%WER 12.23 [ 690 / 5641, 138 ins, 57 del, 495 sub ]
exp/decode_tri2f_tgpr_eval93/wer:%WER 17.74 [ 610 / 3439, 68 ins, 85 del, 457 sub ]
exp/decode_tri2g_tgpr_diag_eval92/wer:%WER 10.65 [ 601 / 5641, 111 ins, 55 del, 435 sub ]
exp/decode_tri2g_tgpr_diag_eval93/wer:%WER 16.49 [ 567 / 3439, 77 ins, 72 del, 418 sub ]
exp/decode_tri2g_tgpr_eval92/wer:%WER 11.08 [ 625 / 5641, 119 ins, 57 del, 449 sub ]
exp/decode_tri2g_tgpr_eval93/wer:%WER 16.40 [ 564 / 3439, 72 ins, 68 del, 424 sub ]
exp/decode_tri2g_tgpr_utt_diag_eval92/wer:%WER 11.10 [ 626 / 5641, 119 ins, 60 del, 447 sub ]
exp/decode_tri2g_tgpr_utt_diag_eval93/wer:%WER 16.08 [ 553 / 3439, 75 ins, 68 del, 410 sub ]
exp/decode_tri2g_tgpr_utt_eval92/wer:%WER 11.19 [ 631 / 5641, 117 ins, 59 del, 455 sub ]
exp/decode_tri2g_tgpr_utt_eval93/wer:%WER 16.17 [ 556 / 3439, 76 ins, 67 del, 413 sub ]
exp/decode_tri2g_tgpr_utt_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 115 ins, 58 del, 441 sub ]
exp/decode_tri2g_tgpr_utt_vtln_diag_eval93/wer:%WER 17.82 [ 613 / 3439, 73 ins, 99 del, 441 sub ]
exp/decode_tri2g_tgpr_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 117 ins, 59 del, 438 sub ]
exp/decode_tri2g_tgpr_vtln_diag_eval93/wer:%WER 15.91 [ 547 / 3439, 73 ins, 68 del, 406 sub ]
exp/decode_tri2h_tgpr_eval92/wer:%WER 13.40 [ 756 / 5641, 163 ins, 54 del, 539 sub ]
exp/decode_tri2h_tgpr_eval93/wer:%WER 20.24 [ 696 / 3439, 69 ins, 109 del, 518 sub ]
exp/decode_tri2i_tgpr_eval92/wer:%WER 12.39 [ 699 / 5641, 130 ins, 72 del, 497 sub ]
exp/decode_tri2i_tgpr_eval93/wer:%WER 18.35 [ 631 / 3439, 58 ins, 102 del, 471 sub ]
exp/decode_tri2j_tgpr_eval92/wer:%WER 12.82 [ 723 / 5641, 127 ins, 70 del, 526 sub ]
exp/decode_tri2j_tgpr_eval93/wer:%WER 18.26 [ 628 / 3439, 59 ins, 99 del, 470 sub ]
exp/decode_tri3a_bg_eval92/wer:%WER 11.82 [ 667 / 5641, 132 ins, 63 del, 472 sub ]
exp/decode_tri3a_bg_eval93/wer:%WER 15.00 [ 516 / 3439, 62 ins, 75 del, 379 sub ]
exp/decode_tri3a_tgpr_dfmllr_eval92/wer:%WER 10.51 [ 593 / 5641, 111 ins, 51 del, 431 sub ]
exp/decode_tri3a_tgpr_dfmllr_eval93/wer:%WER 12.68 [ 436 / 3439, 52 ins, 52 del, 332 sub ]
exp/decode_tri3a_tgpr_eval92/wer:%WER 10.67 [ 602 / 5641, 131 ins, 43 del, 428 sub ]
exp/decode_tri3a_tgpr_eval93/wer:%WER 13.84 [ 476 / 3439, 55 ins, 68 del, 353 sub ]
exp/decode_tri3a_tgpr_fmllr_eval92/wer:%WER 9.54 [ 538 / 5641, 114 ins, 47 del, 377 sub ]
exp/decode_tri3a_tgpr_fmllr_eval93/wer:%WER 12.13 [ 417 / 3439, 52 ins, 59 del, 306 sub ]
exp/decode_tri3a_tgpr_uttdfmllr_eval92/wer:%WER 10.58 [ 597 / 5641, 118 ins, 49 del, 430 sub ]
exp/decode_tri3a_tgpr_uttdfmllr_eval93/wer:%WER 13.29 [ 457 / 3439, 49 ins, 57 del, 351 sub ]
exp/decode_tri3a_tgpr_uttfmllr_eval92/wer:%WER 10.44 [ 589 / 5641, 122 ins, 47 del, 420 sub ]
exp/decode_tri3a_tgpr_uttfmllr_eval93/wer:%WER 13.93 [ 479 / 3439, 56 ins, 69 del, 354 sub ]

Просмотреть файл

@ -32,6 +32,8 @@ while(<STDIN>) {
foreach $w (split (" ",$trans)) { foreach $w (split (" ",$trans)) {
$w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. . $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
$w =~ s:\\::g; # Remove backslashes. We don't need the quoting. $w =~ s:\\::g; # Remove backslashes. We don't need the quoting.
$w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
$w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
if($w =~ m:^\[\<\w+\]$: || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete. if($w =~ m:^\[\<\w+\]$: || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
$w =~ m:^\[\w+\>\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete. $w =~ m:^\[\w+\>\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete.
$w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon. $w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon.

Просмотреть файл

@ -107,9 +107,10 @@ cp data_prep/train_si284.utt2spk data/train.utt2spk
cp data_prep/spk2gender.map data/ cp data_prep/spk2gender.map data/
for x in eval_nov92 dev_nov93 eval_nov93; do for x in eval_nov92 dev_nov93 eval_nov93; do
cp data_prep/$x.spk2utt data/$x.spk2utt cp data_prep/$x.spk2utt data/
cp data_prep/$x.utt2spk data/$x.utt2spk cp data_prep/$x.utt2spk data/
cp data_prep/$x.txt data/$x.txt cp data_prep/$x.txt data/
cp data_prep/${x}_wav.scp data/
done done
# Get the right paths on our system by sourcing the following shell file # Get the right paths on our system by sourcing the following shell file
@ -194,23 +195,41 @@ steps/train_tri2a.sh || exit 1;
scripts/decode.sh exp/decode_tri2a_tgpr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp scripts/decode.sh exp/decode_tri2a_tgpr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2a_tgpr_eval93 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp )& scripts/decode.sh exp/decode_tri2a_tgpr_eval93 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp )&
# also doing tri2a with bigram
(
scripts/mkgraph.sh data/G_bg.fst exp/tri2a/tree exp/tri2a/final.mdl exp/graph_tri2a_bg || exit 1;
scripts/decode.sh exp/decode_tri2a_bg_eval92 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2a_bg_eval93 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp
)&
( scripts/decode.sh exp/decode_tri2a_tgpr_fmllr_utt_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp ( scripts/decode.sh exp/decode_tri2a_tgpr_fmllr_utt_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp
scripts/decode.sh --per-spk exp/decode_tri2a_tgpr_fmllr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp )& scripts/decode.sh --per-spk exp/decode_tri2a_tgpr_fmllr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp )&
steps/train_tri3a.sh || exit 1; steps/train_tri3a.sh || exit 1;
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_tg_pruned || exit 1; (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri3a_tgpr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a.sh data/eval_nov92.scp for year in 92 93; do
scripts/decode.sh exp/decode_tri3a_tgpr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a.sh data/eval_nov${year}.scp
# per-speaker fMLLR # per-speaker fMLLR
scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_fmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov92.scp scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_fmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov${year}.scp
# per-utterance fMLLR # per-utterance fMLLR
scripts/decode.sh exp/decode_tri3a_tgpr_uttfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov92.scp scripts/decode.sh exp/decode_tri3a_tgpr_uttfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov${year}.scp
# per-speaker diagonal fMLLR # per-speaker diagonal fMLLR
scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_dfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov92.scp scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_dfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov${year}.scp
# per-utterance diagonal fMLLR # per-utterance diagonal fMLLR
scripts/decode.sh exp/decode_tri3a_tgpr_uttdfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov92.scp scripts/decode.sh exp/decode_tri3a_tgpr_uttdfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov${year}.scp
done
)& )&
# also doing tri3a with bigram
(
scripts/mkgraph.sh data/G_bg.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_bg || exit 1;
scripts/decode.sh exp/decode_tri3a_bg_eval92 exp/graph_tri3a_bg/HCLG.fst steps/decode_tri3a.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri3a_bg_eval93 exp/graph_tri3a_bg/HCLG.fst steps/decode_tri3a.sh data/eval_nov93.scp
)&
# will delete: # will delete:
## scripts/decode_queue_fmllr.sh exp/graph_tri3a_tg_pruned exp/tri3a/final.mdl exp/decode_tri3a_tg_pruned_fmllr & ## scripts/decode_queue_fmllr.sh exp/graph_tri3a_tg_pruned exp/tri3a/final.mdl exp/decode_tri3a_tg_pruned_fmllr &
@ -223,6 +242,12 @@ steps/train_tri2b.sh
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov92.scp scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2b_tgpr_utt_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp
) & ) &
# Cepstral Mean Normalization (CMN) # Cepstral Mean Normalization (CMN)
@ -250,35 +275,47 @@ steps/train_tri2e.sh
# Splice+LDA+MLLT # Splice+LDA+MLLT
steps/train_tri2f.sh steps/train_tri2f.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2f/tree exp/tri2f/final.mdl exp/graph_tri2f_tg_pruned || exit 1; (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2f/tree exp/tri2f/final.mdl exp/graph_tri2f_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2f_tgpr_eval92 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov92.scp )& scripts/decode.sh exp/decode_tri2f_tgpr_eval92 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2f_tgpr_eval93 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov93.scp
)&
# Linear VTLN (+ regular VTLN) # Linear VTLN (+ regular VTLN)
steps/train_tri2g.sh steps/train_tri2g.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2g/tree exp/tri2g/final.mdl exp/graph_tri2g_tg_pruned || exit 1; (
scripts/decode.sh exp/decode_tri2g_tgpr_utt_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov92.scp scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2g/tree exp/tri2g/final.mdl exp/graph_tri2g_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2g_tgpr_utt_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov92.scp
scripts/decode.sh --wav exp/decode_tri2g_tgpr_utt_vtln_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov92.scp
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov92.scp for year in 92 93; do
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov92.scp scripts/decode.sh exp/decode_tri2g_tgpr_utt_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov${year}.scp
scripts/decode.sh --wav --per-spk exp/decode_tri2g_tgpr_vtln_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov92.scp scripts/decode.sh exp/decode_tri2g_tgpr_utt_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov${year}.scp
scripts/decode.sh --wav exp/decode_tri2g_tgpr_utt_vtln_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov${year}.scp
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov${year}.scp
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov${year}.scp
scripts/decode.sh --wav --per-spk exp/decode_tri2g_tgpr_vtln_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov${year}.scp
done
)& )&
# Splice+HLDA # Splice+HLDA
steps/train_tri2h.sh steps/train_tri2h.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2h/tree exp/tri2h/final.mdl exp/graph_tri2h_tg_pruned || exit 1; (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2h/tree exp/tri2h/final.mdl exp/graph_tri2h_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2h_tgpr_eval92 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov92.scp )& scripts/decode.sh exp/decode_tri2h_tgpr_eval92 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2h_tgpr_eval93 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov93.scp
)&
# Triple-deltas + HLDA # Triple-deltas + HLDA
steps/train_tri2i.sh steps/train_tri2i.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2i/tree exp/tri2i/final.mdl exp/graph_tri2i_tg_pruned || exit 1; (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2i/tree exp/tri2i/final.mdl exp/graph_tri2i_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2i_tgpr_eval92 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov92.scp )& scripts/decode.sh exp/decode_tri2i_tgpr_eval92 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2i_tgpr_eval93 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov93.scp
)&
# Splice + HLDA # Splice + HLDA
steps/train_tri2j.sh steps/train_tri2j.sh
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2j/tree exp/tri2j/final.mdl exp/graph_tri2j_tg_pruned || exit 1; (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2j/tree exp/tri2j/final.mdl exp/graph_tri2j_tg_pruned || exit 1;
scripts/decode.sh exp/decode_tri2j_tgpr_eval92 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov92.scp )& scripts/decode.sh exp/decode_tri2j_tgpr_eval92 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov92.scp
scripts/decode.sh exp/decode_tri2j_tgpr_eval93 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov93.scp
)&
# LDA+ET # LDA+ET
@ -307,4 +344,5 @@ steps/train_tri2l.sh
# For an e.g. of scoring with sclite: do e.g. # For an e.g. of scoring with sclite: do e.g.
# scripts/score_sclite.sh exp/decode_tri2a_tg_pruned # scripts/score_sclite.sh exp/decode_tri2a_tgpr_eval92 data/eval_nov92.txt
# cat exp/decode_tri2a_tgpr_eval92/scoring/hyp.sys

Просмотреть файл

@ -14,8 +14,7 @@
# See the Apache 2 License for the specific language governing permissions and # See the Apache 2 License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
. path.sh
reorder=true # Dan-style, make false for Mirko+Lukas's decoder. reorder=true # Dan-style, make false for Mirko+Lukas's decoder.
@ -55,10 +54,8 @@ loopscale=0.1
tscale=1.0 tscale=1.0
if [ "$nopath" != "1" ]; then if [ "$nopath" != "1" ]; then
export PATH=$PATH:../src/fstbin/:../src/bin:../openfst-1.2/src/bin/ . path.sh
fi fi
mkdir -p $dir mkdir -p $dir

Просмотреть файл

@ -17,29 +17,36 @@
# Does the sclite version of scoring in decode directories. # Does the sclite version of scoring in decode directories.
if [ $# != 1 ]; then if [ $# != 2 ]; then
echo "Usage: scripts/score_sclite.sh <decode-dir>" echo "Usage: scripts/score_sclite.sh <decode-dir> <ref>"
exit 1; exit 1;
fi fi
sclite=../tools/sctk-2.4.0/bin/sclite sclite=../../../tools/sctk-2.4.0/bin/sclite
if [ ! -f $sclite ]; then if [ ! -f $sclite ]; then
echo "The sclite program is not there. Follow the INSTALL instructions in ../tools"; echo "The sclite program is not there. Follow the INSTALL instructions in ../../../tools";
exit 1; exit 1;
fi fi
dir=$1 dir=$1
ref=$2
if [ ! -f "$ref" ]; then
echo "Reference file $ref is not there"
exit 1
fi
scoredir=$dir/scoring scoredir=$dir/scoring
mkdir $scoredir mkdir $scoredir
cat $dir/test?*.tra | \ cat $dir/*.tra | \
scripts/int2sym.pl --ignore-first-field data/words.txt | \ scripts/int2sym.pl --ignore-first-field data/words.txt | \
sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \ sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \
scripts/transcript2hyp.pl > $scoredir/hyp scripts/transcript2hyp.pl > $scoredir/hyp
cat data/test_trans.txt | scripts/transcript2hyp.pl | sed 's:<NOISE>::g' | \ cat $ref | scripts/transcript2hyp.pl | sed 's:<NOISE>::g' | \
sed 's:<SPOKEN_NOISE>::g' > $scoredir/ref sed 's:<SPOKEN_NOISE>::g' > $scoredir/ref
$sclite -r $scoredir/ref trn -h $scoredir/hyp trn -i wsj -o all -o dtl $sclite -r $scoredir/ref trn -h $scoredir/hyp trn -i wsj -o all -o dtl

Просмотреть файл

@ -24,8 +24,8 @@
@section about_what What is Kaldi? @section about_what What is Kaldi?
Kaldi is a toolkit for speech recognition written in C++ and (to be) released Kaldi is a toolkit for speech recognition written in C++ and licensed under
under the open source Apache license. Kaldi is intended for use by speech the Apache License v2.0. Kaldi is intended for use by speech
recognition researchers. For more detailed history and list of contributors see recognition researchers. For more detailed history and list of contributors see
\ref history. \ref history.

Просмотреть файл

@ -74,9 +74,8 @@ namespace kaldi {
KALDI_ASSERT(ApproxEqual(delta, objf_change) && "Probable coding error in optimization"); KALDI_ASSERT(ApproxEqual(delta, objf_change) && "Probable coding error in optimization");
\endcode \endcode
If compiled normally asserts will get checked, but not if compiled with NDEBUG If compiled normally asserts will get checked, but not if compiled with NDEBUG defined.
(with the current \ref build_setup "build setup", the NDEBUG string will be defined For inner-loop assertions that use a lot of CPU,
if compiled with make DEBUGLEVEL=0). For inner-loop assertions that use a lot of CPU,
we use the following pattern: we use the following pattern:
\code \code
#ifdef KALDI_PARANOID #ifdef KALDI_PARANOID

Просмотреть файл

@ -31,7 +31,7 @@
\mainpage Kaldi \mainpage Kaldi
(see also Kaldi's <a href=http://sourceforge.net/projects/kaldi/> project page on Sourceforge </a>) (see also Kaldi's <a href=http://sourceforge.net/projects/kaldi/> project page on Sourceforge </a>)
<p>
- \ref about - \ref about
- \ref install - \ref install
- \ref build_setup - \ref build_setup

Просмотреть файл

@ -32,7 +32,6 @@ int main(int argc, char *argv[])
// construct all the global objects // construct all the global objects
ParseOptions po(usage); ParseOptions po(usage);
MfccOptions mfcc_opts; MfccOptions mfcc_opts;
bool binary = true;
bool subtract_mean = false; bool subtract_mean = false;
BaseFloat vtln_warp = 1.0; BaseFloat vtln_warp = 1.0;
std::string vtln_map_rspecifier; std::string vtln_map_rspecifier;
@ -44,7 +43,6 @@ int main(int argc, char *argv[])
// Register the options // Register the options
po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]"); po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. "); po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. ");
po.Register("binary", &binary, "Write output in binary mode");
po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)"); po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)");
po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)"); po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)"); po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");

Просмотреть файл

@ -32,7 +32,6 @@ int main(int argc, char *argv[])
// construct all the global objects // construct all the global objects
ParseOptions po(usage); ParseOptions po(usage);
PlpOptions plp_opts; PlpOptions plp_opts;
bool binary = true;
bool subtract_mean = false; bool subtract_mean = false;
BaseFloat vtln_warp = 1.0; BaseFloat vtln_warp = 1.0;
std::string vtln_map_rspecifier; std::string vtln_map_rspecifier;
@ -44,7 +43,6 @@ int main(int argc, char *argv[])
// Register the options // Register the options
po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]"); po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]. "); po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]. ");
po.Register("binary", &binary, "Write output in binary mode");
po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)"); po.Register("vtln-warp", &vtln_warp, "Vtln warp factor"); po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)"); po.Register("vtln-warp", &vtln_warp, "Vtln warp factor");
po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)"); po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)"); po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");