зеркало из https://github.com/mozilla/kaldi.git
Script changes and updating RESULTS files.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@45 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
fb0e51410b
Коммит
9d5dc7f9e9
|
@ -110,7 +110,6 @@ while [ $x -lt $numiters ]; do
|
|||
compose-transforms --print-args=false $dir/$x.mat.new $cur_lda $dir/$x.mat || exit 1;
|
||||
cur_lda=$dir/$x.mat
|
||||
|
||||
|
||||
feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $cur_lda ark:- ark:-|"
|
||||
# Subset of features used to train MLLT transforms.
|
||||
featsub="ark:scripts/subset_scp.pl 800 data/train.scp | splice-feats scp:- ark:- | transform-feats $cur_lda ark:- ark:-|"
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
|
||||
Note on baselines:
|
||||
--
|
||||
Baseline 1:
|
||||
"Robust Decision Tree State Tying for Continuous Speech Recognition",
|
||||
Wolfgang Reichl and Wu Chou, IEEE Trans. Speech and Audio Processing (2000),
|
||||
give the following numbers for gender-independent systems:
|
||||
|
||||
Trained on SI-84, tested on Nov'92 (20k open): bigram 14.4%, trigram 12.8%.
|
||||
|
||||
Trained on SI-284, 20k open, tested on Nov'92 and '93:
|
||||
bigram: Nov'92, 11.9%; nov'93, 15.4% [c.f. us (tri3a_bg_eval92, 11.82; tri3a_bg_eval93, 15.00)]
|
||||
trigram: Nov'92, 9.8%; nov'93, 13.4%
|
||||
--
|
||||
Baseline 2:
|
||||
"Large Vocabulary Continuous Speech Recognition using HTK", by P. C. Woodland et. al,
|
||||
ICASSP 1994.
|
||||
|
||||
Table 2: gender dependent xwrd system, SI-284 training, 20k test:
|
||||
bigram: Nov'92, 11.08%; Nov'93,14.45% [c.f. us (tri3a_bg_eval92, 11.82; tri3a_bg_eval93, 15.00)]
|
||||
trigram: Nov'92, 9.46%, Nov'93, 12.74%
|
||||
|
||||
--
|
||||
|
||||
exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
|
||||
exp/decode_tri1_tgpr_eval92/wer:%WER 13.30 [ 750 / 5641, 133 ins, 74 del, 543 sub ]
|
||||
exp/decode_tri2a_bg_eval92/wer:%WER 14.25 [ 804 / 5641, 146 ins, 87 del, 571 sub ]
|
||||
exp/decode_tri2a_bg_eval93/wer:%WER 20.97 [ 721 / 3439, 62 ins, 122 del, 537 sub ]
|
||||
exp/decode_tri2a_tgpr_eval92/wer:%WER 12.52 [ 706 / 5641, 127 ins, 60 del, 519 sub ]
|
||||
exp/decode_tri2a_tgpr_eval93/wer:%WER 18.29 [ 629 / 3439, 47 ins, 104 del, 478 sub ]
|
||||
exp/decode_tri2a_tgpr_fmllr_eval92/wer:%WER 11.42 [ 644 / 5641, 116 ins, 60 del, 468 sub ]
|
||||
exp/decode_tri2a_tgpr_fmllr_utt_eval92/wer:%WER 12.48 [ 704 / 5641, 128 ins, 56 del, 520 sub ]
|
||||
exp/decode_tri2b_tgpr_eval92/wer:%WER 11.31 [ 638 / 5641, 128 ins, 58 del, 452 sub ]
|
||||
exp/decode_tri2b_tgpr_eval93/wer:%WER 16.14 [ 555 / 3439, 59 ins, 89 del, 407 sub ]
|
||||
exp/decode_tri2b_tgpr_fmllr_eval92/wer:%WER 10.25 [ 578 / 5641, 111 ins, 53 del, 414 sub ]
|
||||
exp/decode_tri2b_tgpr_fmllr_eval93/wer:%WER 14.13 [ 486 / 3439, 46 ins, 84 del, 356 sub ]
|
||||
exp/decode_tri2b_tgpr_utt_eval92/wer:%WER 11.26 [ 635 / 5641, 122 ins, 62 del, 451 sub ]
|
||||
exp/decode_tri2b_tgpr_utt_eval93/wer:%WER 15.93 [ 548 / 3439, 56 ins, 91 del, 401 sub ]
|
||||
exp/decode_tri2b_tgpr_utt_fmllr_eval92/wer:%WER 11.19 [ 631 / 5641, 122 ins, 61 del, 448 sub ]
|
||||
exp/decode_tri2b_tgpr_utt_fmllr_eval93/wer:%WER 15.85 [ 545 / 3439, 54 ins, 93 del, 398 sub ]
|
||||
exp/decode_tri2c_tgpr_eval92/wer:%WER 12.71 [ 717 / 5641, 137 ins, 72 del, 508 sub ]
|
||||
exp/decode_tri2c_tgpr_eval93/wer:%WER 17.01 [ 585 / 3439, 61 ins, 85 del, 439 sub ]
|
||||
exp/decode_tri2c_tgpr_utt_eval92/wer:%WER 12.96 [ 731 / 5641, 148 ins, 67 del, 516 sub ]
|
||||
exp/decode_tri2d_tgpr_eval92/wer:%WER 13.03 [ 735 / 5641, 138 ins, 74 del, 523 sub ]
|
||||
exp/decode_tri2d_tgpr_eval93/wer:%WER 19.40 [ 667 / 3439, 48 ins, 130 del, 489 sub ]
|
||||
exp/decode_tri2e_tgpr_eval92/wer:%WER 14.29 [ 806 / 5641, 155 ins, 79 del, 572 sub ]
|
||||
exp/decode_tri2e_tgpr_eval93/wer:%WER 19.08 [ 656 / 3439, 71 ins, 120 del, 465 sub ]
|
||||
exp/decode_tri2f_tgpr_eval92/wer:%WER 12.23 [ 690 / 5641, 138 ins, 57 del, 495 sub ]
|
||||
exp/decode_tri2f_tgpr_eval93/wer:%WER 17.74 [ 610 / 3439, 68 ins, 85 del, 457 sub ]
|
||||
exp/decode_tri2g_tgpr_diag_eval92/wer:%WER 10.65 [ 601 / 5641, 111 ins, 55 del, 435 sub ]
|
||||
exp/decode_tri2g_tgpr_diag_eval93/wer:%WER 16.49 [ 567 / 3439, 77 ins, 72 del, 418 sub ]
|
||||
exp/decode_tri2g_tgpr_eval92/wer:%WER 11.08 [ 625 / 5641, 119 ins, 57 del, 449 sub ]
|
||||
exp/decode_tri2g_tgpr_eval93/wer:%WER 16.40 [ 564 / 3439, 72 ins, 68 del, 424 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_diag_eval92/wer:%WER 11.10 [ 626 / 5641, 119 ins, 60 del, 447 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_diag_eval93/wer:%WER 16.08 [ 553 / 3439, 75 ins, 68 del, 410 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_eval92/wer:%WER 11.19 [ 631 / 5641, 117 ins, 59 del, 455 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_eval93/wer:%WER 16.17 [ 556 / 3439, 76 ins, 67 del, 413 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 115 ins, 58 del, 441 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_vtln_diag_eval93/wer:%WER 17.82 [ 613 / 3439, 73 ins, 99 del, 441 sub ]
|
||||
exp/decode_tri2g_tgpr_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 117 ins, 59 del, 438 sub ]
|
||||
exp/decode_tri2g_tgpr_vtln_diag_eval93/wer:%WER 15.91 [ 547 / 3439, 73 ins, 68 del, 406 sub ]
|
||||
exp/decode_tri2h_tgpr_eval92/wer:%WER 13.40 [ 756 / 5641, 163 ins, 54 del, 539 sub ]
|
||||
exp/decode_tri2h_tgpr_eval93/wer:%WER 20.24 [ 696 / 3439, 69 ins, 109 del, 518 sub ]
|
||||
exp/decode_tri2i_tgpr_eval92/wer:%WER 12.39 [ 699 / 5641, 130 ins, 72 del, 497 sub ]
|
||||
exp/decode_tri2i_tgpr_eval93/wer:%WER 18.35 [ 631 / 3439, 58 ins, 102 del, 471 sub ]
|
||||
exp/decode_tri2j_tgpr_eval92/wer:%WER 12.82 [ 723 / 5641, 127 ins, 70 del, 526 sub ]
|
||||
exp/decode_tri2j_tgpr_eval93/wer:%WER 18.26 [ 628 / 3439, 59 ins, 99 del, 470 sub ]
|
||||
exp/decode_tri3a_bg_eval92/wer:%WER 11.82 [ 667 / 5641, 132 ins, 63 del, 472 sub ]
|
||||
exp/decode_tri3a_bg_eval93/wer:%WER 15.00 [ 516 / 3439, 62 ins, 75 del, 379 sub ]
|
||||
exp/decode_tri3a_tgpr_dfmllr_eval92/wer:%WER 10.51 [ 593 / 5641, 111 ins, 51 del, 431 sub ]
|
||||
exp/decode_tri3a_tgpr_dfmllr_eval93/wer:%WER 12.68 [ 436 / 3439, 52 ins, 52 del, 332 sub ]
|
||||
exp/decode_tri3a_tgpr_eval92/wer:%WER 10.67 [ 602 / 5641, 131 ins, 43 del, 428 sub ]
|
||||
exp/decode_tri3a_tgpr_eval93/wer:%WER 13.84 [ 476 / 3439, 55 ins, 68 del, 353 sub ]
|
||||
exp/decode_tri3a_tgpr_fmllr_eval92/wer:%WER 9.54 [ 538 / 5641, 114 ins, 47 del, 377 sub ]
|
||||
exp/decode_tri3a_tgpr_fmllr_eval93/wer:%WER 12.13 [ 417 / 3439, 52 ins, 59 del, 306 sub ]
|
||||
exp/decode_tri3a_tgpr_uttdfmllr_eval92/wer:%WER 10.58 [ 597 / 5641, 118 ins, 49 del, 430 sub ]
|
||||
exp/decode_tri3a_tgpr_uttdfmllr_eval93/wer:%WER 13.29 [ 457 / 3439, 49 ins, 57 del, 351 sub ]
|
||||
exp/decode_tri3a_tgpr_uttfmllr_eval92/wer:%WER 10.44 [ 589 / 5641, 122 ins, 47 del, 420 sub ]
|
||||
exp/decode_tri3a_tgpr_uttfmllr_eval93/wer:%WER 13.93 [ 479 / 3439, 56 ins, 69 del, 354 sub ]
|
|
@ -32,6 +32,8 @@ while(<STDIN>) {
|
|||
foreach $w (split (" ",$trans)) {
|
||||
$w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
|
||||
$w =~ s:\\::g; # Remove backslashes. We don't need the quoting.
|
||||
$w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
|
||||
$w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
|
||||
if($w =~ m:^\[\<\w+\]$: || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
|
||||
$w =~ m:^\[\w+\>\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete.
|
||||
$w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon.
|
||||
|
|
|
@ -107,9 +107,10 @@ cp data_prep/train_si284.utt2spk data/train.utt2spk
|
|||
cp data_prep/spk2gender.map data/
|
||||
|
||||
for x in eval_nov92 dev_nov93 eval_nov93; do
|
||||
cp data_prep/$x.spk2utt data/$x.spk2utt
|
||||
cp data_prep/$x.utt2spk data/$x.utt2spk
|
||||
cp data_prep/$x.txt data/$x.txt
|
||||
cp data_prep/$x.spk2utt data/
|
||||
cp data_prep/$x.utt2spk data/
|
||||
cp data_prep/$x.txt data/
|
||||
cp data_prep/${x}_wav.scp data/
|
||||
done
|
||||
|
||||
# Get the right paths on our system by sourcing the following shell file
|
||||
|
@ -194,23 +195,41 @@ steps/train_tri2a.sh || exit 1;
|
|||
scripts/decode.sh exp/decode_tri2a_tgpr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2a_tgpr_eval93 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp )&
|
||||
|
||||
# also doing tri2a with bigram
|
||||
(
|
||||
scripts/mkgraph.sh data/G_bg.fst exp/tri2a/tree exp/tri2a/final.mdl exp/graph_tri2a_bg || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2a_bg_eval92 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2a_bg_eval93 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
|
||||
( scripts/decode.sh exp/decode_tri2a_tgpr_fmllr_utt_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2a_tgpr_fmllr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp )&
|
||||
|
||||
steps/train_tri3a.sh || exit 1;
|
||||
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri3a_tgpr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a.sh data/eval_nov92.scp
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh exp/decode_tri3a_tgpr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a.sh data/eval_nov${year}.scp
|
||||
# per-speaker fMLLR
|
||||
scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_fmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_fmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov${year}.scp
|
||||
# per-utterance fMLLR
|
||||
scripts/decode.sh exp/decode_tri3a_tgpr_uttfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri3a_tgpr_uttfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov${year}.scp
|
||||
# per-speaker diagonal fMLLR
|
||||
scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_dfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_dfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov${year}.scp
|
||||
# per-utterance diagonal fMLLR
|
||||
scripts/decode.sh exp/decode_tri3a_tgpr_uttdfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri3a_tgpr_uttdfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov${year}.scp
|
||||
done
|
||||
)&
|
||||
|
||||
# also doing tri3a with bigram
|
||||
(
|
||||
scripts/mkgraph.sh data/G_bg.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_bg || exit 1;
|
||||
scripts/decode.sh exp/decode_tri3a_bg_eval92 exp/graph_tri3a_bg/HCLG.fst steps/decode_tri3a.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri3a_bg_eval93 exp/graph_tri3a_bg/HCLG.fst steps/decode_tri3a.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
|
||||
# will delete:
|
||||
## scripts/decode_queue_fmllr.sh exp/graph_tri3a_tg_pruned exp/tri3a/final.mdl exp/decode_tri3a_tg_pruned_fmllr &
|
||||
|
||||
|
@ -223,6 +242,12 @@ steps/train_tri2b.sh
|
|||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp
|
||||
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp
|
||||
|
||||
) &
|
||||
|
||||
# Cepstral Mean Normalization (CMN)
|
||||
|
@ -250,35 +275,47 @@ steps/train_tri2e.sh
|
|||
# Splice+LDA+MLLT
|
||||
steps/train_tri2f.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2f/tree exp/tri2f/final.mdl exp/graph_tri2f_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2f_tgpr_eval92 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov92.scp )&
|
||||
scripts/decode.sh exp/decode_tri2f_tgpr_eval92 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2f_tgpr_eval93 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
# Linear VTLN (+ regular VTLN)
|
||||
steps/train_tri2g.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2g/tree exp/tri2g/final.mdl exp/graph_tri2g_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2g_tgpr_utt_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2g_tgpr_utt_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --wav exp/decode_tri2g_tgpr_utt_vtln_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov92.scp
|
||||
(
|
||||
scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2g/tree exp/tri2g/final.mdl exp/graph_tri2g_tg_pruned || exit 1;
|
||||
|
||||
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --wav --per-spk exp/decode_tri2g_tgpr_vtln_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov92.scp
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh exp/decode_tri2g_tgpr_utt_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_tri2g_tgpr_utt_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --wav exp/decode_tri2g_tgpr_utt_vtln_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov${year}.scp
|
||||
|
||||
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --wav --per-spk exp/decode_tri2g_tgpr_vtln_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov${year}.scp
|
||||
done
|
||||
|
||||
)&
|
||||
|
||||
# Splice+HLDA
|
||||
steps/train_tri2h.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2h/tree exp/tri2h/final.mdl exp/graph_tri2h_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2h_tgpr_eval92 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov92.scp )&
|
||||
scripts/decode.sh exp/decode_tri2h_tgpr_eval92 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2h_tgpr_eval93 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
# Triple-deltas + HLDA
|
||||
steps/train_tri2i.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2i/tree exp/tri2i/final.mdl exp/graph_tri2i_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2i_tgpr_eval92 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov92.scp )&
|
||||
scripts/decode.sh exp/decode_tri2i_tgpr_eval92 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2i_tgpr_eval93 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
# Splice + HLDA
|
||||
steps/train_tri2j.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2j/tree exp/tri2j/final.mdl exp/graph_tri2j_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2j_tgpr_eval92 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov92.scp )&
|
||||
scripts/decode.sh exp/decode_tri2j_tgpr_eval92 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2j_tgpr_eval93 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
|
||||
# LDA+ET
|
||||
|
@ -307,4 +344,5 @@ steps/train_tri2l.sh
|
|||
|
||||
|
||||
# For an e.g. of scoring with sclite: do e.g.
|
||||
# scripts/score_sclite.sh exp/decode_tri2a_tg_pruned
|
||||
# scripts/score_sclite.sh exp/decode_tri2a_tgpr_eval92 data/eval_nov92.txt
|
||||
# cat exp/decode_tri2a_tgpr_eval92/scoring/hyp.sys
|
||||
|
|
|
@ -14,8 +14,7 @@
|
|||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
. path.sh
|
||||
|
||||
|
||||
reorder=true # Dan-style, make false for Mirko+Lukas's decoder.
|
||||
|
||||
|
@ -55,10 +54,8 @@ loopscale=0.1
|
|||
tscale=1.0
|
||||
|
||||
|
||||
|
||||
|
||||
if [ "$nopath" != "1" ]; then
|
||||
export PATH=$PATH:../src/fstbin/:../src/bin:../openfst-1.2/src/bin/
|
||||
. path.sh
|
||||
fi
|
||||
|
||||
mkdir -p $dir
|
||||
|
|
|
@ -17,29 +17,36 @@
|
|||
|
||||
# Does the sclite version of scoring in decode directories.
|
||||
|
||||
if [ $# != 1 ]; then
|
||||
echo "Usage: scripts/score_sclite.sh <decode-dir>"
|
||||
if [ $# != 2 ]; then
|
||||
echo "Usage: scripts/score_sclite.sh <decode-dir> <ref>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
sclite=../tools/sctk-2.4.0/bin/sclite
|
||||
sclite=../../../tools/sctk-2.4.0/bin/sclite
|
||||
|
||||
if [ ! -f $sclite ]; then
|
||||
echo "The sclite program is not there. Follow the INSTALL instructions in ../tools";
|
||||
echo "The sclite program is not there. Follow the INSTALL instructions in ../../../tools";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
dir=$1
|
||||
ref=$2
|
||||
|
||||
if [ ! -f "$ref" ]; then
|
||||
echo "Reference file $ref is not there"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
scoredir=$dir/scoring
|
||||
mkdir $scoredir
|
||||
|
||||
cat $dir/test?*.tra | \
|
||||
cat $dir/*.tra | \
|
||||
scripts/int2sym.pl --ignore-first-field data/words.txt | \
|
||||
sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \
|
||||
scripts/transcript2hyp.pl > $scoredir/hyp
|
||||
|
||||
cat data/test_trans.txt | scripts/transcript2hyp.pl | sed 's:<NOISE>::g' | \
|
||||
cat $ref | scripts/transcript2hyp.pl | sed 's:<NOISE>::g' | \
|
||||
sed 's:<SPOKEN_NOISE>::g' > $scoredir/ref
|
||||
|
||||
$sclite -r $scoredir/ref trn -h $scoredir/hyp trn -i wsj -o all -o dtl
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
|
||||
@section about_what What is Kaldi?
|
||||
|
||||
Kaldi is a toolkit for speech recognition written in C++ and (to be) released
|
||||
under the open source Apache license. Kaldi is intended for use by speech
|
||||
Kaldi is a toolkit for speech recognition written in C++ and licensed under
|
||||
the Apache License v2.0. Kaldi is intended for use by speech
|
||||
recognition researchers. For more detailed history and list of contributors see
|
||||
\ref history.
|
||||
|
||||
|
|
|
@ -74,9 +74,8 @@ namespace kaldi {
|
|||
KALDI_ASSERT(ApproxEqual(delta, objf_change) && "Probable coding error in optimization");
|
||||
\endcode
|
||||
|
||||
If compiled normally asserts will get checked, but not if compiled with NDEBUG
|
||||
(with the current \ref build_setup "build setup", the NDEBUG string will be defined
|
||||
if compiled with make DEBUGLEVEL=0). For inner-loop assertions that use a lot of CPU,
|
||||
If compiled normally asserts will get checked, but not if compiled with NDEBUG defined.
|
||||
For inner-loop assertions that use a lot of CPU,
|
||||
we use the following pattern:
|
||||
\code
|
||||
#ifdef KALDI_PARANOID
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
\mainpage Kaldi
|
||||
|
||||
(see also Kaldi's <a href=http://sourceforge.net/projects/kaldi/> project page on Sourceforge </a>)
|
||||
|
||||
<p>
|
||||
- \ref about
|
||||
- \ref install
|
||||
- \ref build_setup
|
||||
|
|
|
@ -32,7 +32,6 @@ int main(int argc, char *argv[])
|
|||
// construct all the global objects
|
||||
ParseOptions po(usage);
|
||||
MfccOptions mfcc_opts;
|
||||
bool binary = true;
|
||||
bool subtract_mean = false;
|
||||
BaseFloat vtln_warp = 1.0;
|
||||
std::string vtln_map_rspecifier;
|
||||
|
@ -44,7 +43,6 @@ int main(int argc, char *argv[])
|
|||
// Register the options
|
||||
po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
|
||||
po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. ");
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)");
|
||||
po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
|
||||
po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");
|
||||
|
|
|
@ -32,7 +32,6 @@ int main(int argc, char *argv[])
|
|||
// construct all the global objects
|
||||
ParseOptions po(usage);
|
||||
PlpOptions plp_opts;
|
||||
bool binary = true;
|
||||
bool subtract_mean = false;
|
||||
BaseFloat vtln_warp = 1.0;
|
||||
std::string vtln_map_rspecifier;
|
||||
|
@ -44,7 +43,6 @@ int main(int argc, char *argv[])
|
|||
// Register the options
|
||||
po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
|
||||
po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]. ");
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)"); po.Register("vtln-warp", &vtln_warp, "Vtln warp factor");
|
||||
po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
|
||||
po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");
|
||||
|
|
Загрузка…
Ссылка в новой задаче