Script changes and updating RESULTS files.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@45 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2011-05-18 22:50:00 +00:00 · 2011-05-18 22:50:00 +00:00 · 9d5dc7f9e9
--- a/egs/rm/s1/steps/train_tri2f.sh
+++ b/egs/rm/s1/steps/train_tri2f.sh
@ -110,7 +110,6 @@ while [ $x -lt $numiters ]; do
     compose-transforms --print-args=false $dir/$x.mat.new $cur_lda $dir/$x.mat || exit 1;
     cur_lda=$dir/$x.mat

-
     feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $cur_lda ark:- ark:-|"
     # Subset of features used to train MLLT transforms.
     featsub="ark:scripts/subset_scp.pl 800 data/train.scp | splice-feats scp:- ark:- | transform-feats $cur_lda ark:- ark:-|"
--- a/egs/wsj/s1/RESULTS
+++ b/egs/wsj/s1/RESULTS
@ -0,0 +1,79 @@
+
+Note on baselines:
+--
+ Baseline 1:
+ "Robust Decision Tree State Tying for Continuous Speech Recognition",
+  Wolfgang Reichl and Wu Chou, IEEE Trans. Speech and Audio Processing (2000),
+ give the following numbers for gender-independent systems:
+
+ Trained on SI-84, tested on Nov'92 (20k open):  bigram 14.4%, trigram 12.8%.
+
+ Trained on SI-284, 20k open, tested on Nov'92 and '93:
+   bigram: Nov'92, 11.9%;  nov'93, 15.4% [c.f. us (tri3a_bg_eval92, 11.82; tri3a_bg_eval93, 15.00)]
+  trigram: Nov'92, 9.8%;  nov'93, 13.4%
+--
+ Baseline 2:
+ "Large Vocabulary Continuous Speech Recognition using HTK", by P. C. Woodland et. al,
+ ICASSP 1994.
+
+ Table 2: gender dependent xwrd system, SI-284 training, 20k test:
+ bigram: Nov'92, 11.08%;  Nov'93,14.45%  [c.f. us (tri3a_bg_eval92, 11.82; tri3a_bg_eval93, 15.00)]
+ trigram: Nov'92, 9.46%, Nov'93, 12.74%
+
+--
+
+exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
+exp/decode_tri1_tgpr_eval92/wer:%WER 13.30 [ 750 / 5641, 133 ins, 74 del, 543 sub ]
+exp/decode_tri2a_bg_eval92/wer:%WER 14.25 [ 804 / 5641, 146 ins, 87 del, 571 sub ]
+exp/decode_tri2a_bg_eval93/wer:%WER 20.97 [ 721 / 3439, 62 ins, 122 del, 537 sub ]
+exp/decode_tri2a_tgpr_eval92/wer:%WER 12.52 [ 706 / 5641, 127 ins, 60 del, 519 sub ]
+exp/decode_tri2a_tgpr_eval93/wer:%WER 18.29 [ 629 / 3439, 47 ins, 104 del, 478 sub ]
+exp/decode_tri2a_tgpr_fmllr_eval92/wer:%WER 11.42 [ 644 / 5641, 116 ins, 60 del, 468 sub ]
+exp/decode_tri2a_tgpr_fmllr_utt_eval92/wer:%WER 12.48 [ 704 / 5641, 128 ins, 56 del, 520 sub ]
+exp/decode_tri2b_tgpr_eval92/wer:%WER 11.31 [ 638 / 5641, 128 ins, 58 del, 452 sub ]
+exp/decode_tri2b_tgpr_eval93/wer:%WER 16.14 [ 555 / 3439, 59 ins, 89 del, 407 sub ]
+exp/decode_tri2b_tgpr_fmllr_eval92/wer:%WER 10.25 [ 578 / 5641, 111 ins, 53 del, 414 sub ]
+exp/decode_tri2b_tgpr_fmllr_eval93/wer:%WER 14.13 [ 486 / 3439, 46 ins, 84 del, 356 sub ]
+exp/decode_tri2b_tgpr_utt_eval92/wer:%WER 11.26 [ 635 / 5641, 122 ins, 62 del, 451 sub ]
+exp/decode_tri2b_tgpr_utt_eval93/wer:%WER 15.93 [ 548 / 3439, 56 ins, 91 del, 401 sub ]
+exp/decode_tri2b_tgpr_utt_fmllr_eval92/wer:%WER 11.19 [ 631 / 5641, 122 ins, 61 del, 448 sub ]
+exp/decode_tri2b_tgpr_utt_fmllr_eval93/wer:%WER 15.85 [ 545 / 3439, 54 ins, 93 del, 398 sub ]
+exp/decode_tri2c_tgpr_eval92/wer:%WER 12.71 [ 717 / 5641, 137 ins, 72 del, 508 sub ]
+exp/decode_tri2c_tgpr_eval93/wer:%WER 17.01 [ 585 / 3439, 61 ins, 85 del, 439 sub ]
+exp/decode_tri2c_tgpr_utt_eval92/wer:%WER 12.96 [ 731 / 5641, 148 ins, 67 del, 516 sub ]
+exp/decode_tri2d_tgpr_eval92/wer:%WER 13.03 [ 735 / 5641, 138 ins, 74 del, 523 sub ]
+exp/decode_tri2d_tgpr_eval93/wer:%WER 19.40 [ 667 / 3439, 48 ins, 130 del, 489 sub ]
+exp/decode_tri2e_tgpr_eval92/wer:%WER 14.29 [ 806 / 5641, 155 ins, 79 del, 572 sub ]
+exp/decode_tri2e_tgpr_eval93/wer:%WER 19.08 [ 656 / 3439, 71 ins, 120 del, 465 sub ]
+exp/decode_tri2f_tgpr_eval92/wer:%WER 12.23 [ 690 / 5641, 138 ins, 57 del, 495 sub ]
+exp/decode_tri2f_tgpr_eval93/wer:%WER 17.74 [ 610 / 3439, 68 ins, 85 del, 457 sub ]
+exp/decode_tri2g_tgpr_diag_eval92/wer:%WER 10.65 [ 601 / 5641, 111 ins, 55 del, 435 sub ]
+exp/decode_tri2g_tgpr_diag_eval93/wer:%WER 16.49 [ 567 / 3439, 77 ins, 72 del, 418 sub ]
+exp/decode_tri2g_tgpr_eval92/wer:%WER 11.08 [ 625 / 5641, 119 ins, 57 del, 449 sub ]
+exp/decode_tri2g_tgpr_eval93/wer:%WER 16.40 [ 564 / 3439, 72 ins, 68 del, 424 sub ]
+exp/decode_tri2g_tgpr_utt_diag_eval92/wer:%WER 11.10 [ 626 / 5641, 119 ins, 60 del, 447 sub ]
+exp/decode_tri2g_tgpr_utt_diag_eval93/wer:%WER 16.08 [ 553 / 3439, 75 ins, 68 del, 410 sub ]
+exp/decode_tri2g_tgpr_utt_eval92/wer:%WER 11.19 [ 631 / 5641, 117 ins, 59 del, 455 sub ]
+exp/decode_tri2g_tgpr_utt_eval93/wer:%WER 16.17 [ 556 / 3439, 76 ins, 67 del, 413 sub ]
+exp/decode_tri2g_tgpr_utt_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 115 ins, 58 del, 441 sub ]
+exp/decode_tri2g_tgpr_utt_vtln_diag_eval93/wer:%WER 17.82 [ 613 / 3439, 73 ins, 99 del, 441 sub ]
+exp/decode_tri2g_tgpr_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 117 ins, 59 del, 438 sub ]
+exp/decode_tri2g_tgpr_vtln_diag_eval93/wer:%WER 15.91 [ 547 / 3439, 73 ins, 68 del, 406 sub ]
+exp/decode_tri2h_tgpr_eval92/wer:%WER 13.40 [ 756 / 5641, 163 ins, 54 del, 539 sub ]
+exp/decode_tri2h_tgpr_eval93/wer:%WER 20.24 [ 696 / 3439, 69 ins, 109 del, 518 sub ]
+exp/decode_tri2i_tgpr_eval92/wer:%WER 12.39 [ 699 / 5641, 130 ins, 72 del, 497 sub ]
+exp/decode_tri2i_tgpr_eval93/wer:%WER 18.35 [ 631 / 3439, 58 ins, 102 del, 471 sub ]
+exp/decode_tri2j_tgpr_eval92/wer:%WER 12.82 [ 723 / 5641, 127 ins, 70 del, 526 sub ]
+exp/decode_tri2j_tgpr_eval93/wer:%WER 18.26 [ 628 / 3439, 59 ins, 99 del, 470 sub ]
+exp/decode_tri3a_bg_eval92/wer:%WER 11.82 [ 667 / 5641, 132 ins, 63 del, 472 sub ]
+exp/decode_tri3a_bg_eval93/wer:%WER 15.00 [ 516 / 3439, 62 ins, 75 del, 379 sub ]
+exp/decode_tri3a_tgpr_dfmllr_eval92/wer:%WER 10.51 [ 593 / 5641, 111 ins, 51 del, 431 sub ]
+exp/decode_tri3a_tgpr_dfmllr_eval93/wer:%WER 12.68 [ 436 / 3439, 52 ins, 52 del, 332 sub ]
+exp/decode_tri3a_tgpr_eval92/wer:%WER 10.67 [ 602 / 5641, 131 ins, 43 del, 428 sub ]
+exp/decode_tri3a_tgpr_eval93/wer:%WER 13.84 [ 476 / 3439, 55 ins, 68 del, 353 sub ]
+exp/decode_tri3a_tgpr_fmllr_eval92/wer:%WER 9.54 [ 538 / 5641, 114 ins, 47 del, 377 sub ]
+exp/decode_tri3a_tgpr_fmllr_eval93/wer:%WER 12.13 [ 417 / 3439, 52 ins, 59 del, 306 sub ]
+exp/decode_tri3a_tgpr_uttdfmllr_eval92/wer:%WER 10.58 [ 597 / 5641, 118 ins, 49 del, 430 sub ]
+exp/decode_tri3a_tgpr_uttdfmllr_eval93/wer:%WER 13.29 [ 457 / 3439, 49 ins, 57 del, 351 sub ]
+exp/decode_tri3a_tgpr_uttfmllr_eval92/wer:%WER 10.44 [ 589 / 5641, 122 ins, 47 del, 420 sub ]
+exp/decode_tri3a_tgpr_uttfmllr_eval93/wer:%WER 13.93 [ 479 / 3439, 56 ins, 69 del, 354 sub ]
--- a/egs/wsj/s1/data_prep/normalize_transcript.pl
+++ b/egs/wsj/s1/data_prep/normalize_transcript.pl
@ -32,6 +32,8 @@ while(<STDIN>) {
    foreach $w (split (" ",$trans)) {
        $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. .
        $w =~ s:\\::g;      # Remove backslashes.  We don't need the quoting.
+        $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts.
+        $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts.
        if($w =~ m:^\[\<\w+\]$:  || # E.g. [<door_slam], this means a door slammed in the preceding word. Delete.
           $w =~ m:^\[\w+\>\]$:  ||  # E.g. [door_slam>], this means a door slammed in the next word.  Delete.
           $w =~ m:\[\w+/\]$: ||  # E.g. [phone_ring/], which indicates the start of this phenomenon.
--- a/egs/wsj/s1/run.sh
+++ b/egs/wsj/s1/run.sh
@ -107,9 +107,10 @@ cp data_prep/train_si284.utt2spk data/train.utt2spk
 cp data_prep/spk2gender.map data/

 for x in eval_nov92 dev_nov93 eval_nov93; do 
-  cp data_prep/$x.spk2utt data/$x.spk2utt
-  cp data_prep/$x.utt2spk data/$x.utt2spk
-  cp data_prep/$x.txt data/$x.txt
+  cp data_prep/$x.spk2utt data/
+  cp data_prep/$x.utt2spk data/
+  cp data_prep/$x.txt data/
+  cp data_prep/${x}_wav.scp data/
 done

 # Get the right paths on our system by sourcing the following shell file
@ -194,23 +195,41 @@ steps/train_tri2a.sh || exit 1;
 scripts/decode.sh exp/decode_tri2a_tgpr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp 
 scripts/decode.sh exp/decode_tri2a_tgpr_eval93 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp )&

+# also doing tri2a with bigram
+(
+ scripts/mkgraph.sh data/G_bg.fst exp/tri2a/tree exp/tri2a/final.mdl exp/graph_tri2a_bg || exit 1;
+ scripts/decode.sh exp/decode_tri2a_bg_eval92 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp 
+ scripts/decode.sh exp/decode_tri2a_bg_eval93 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp 
+)&
+
+
 ( scripts/decode.sh exp/decode_tri2a_tgpr_fmllr_utt_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp 
 scripts/decode.sh --per-spk exp/decode_tri2a_tgpr_fmllr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp )&

 steps/train_tri3a.sh || exit 1;

 (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_tg_pruned || exit 1;
- scripts/decode.sh exp/decode_tri3a_tgpr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a.sh data/eval_nov92.scp 
+for year in 92 93; do
+ scripts/decode.sh exp/decode_tri3a_tgpr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a.sh data/eval_nov${year}.scp 
 # per-speaker fMLLR
-scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_fmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov92.scp
+scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_fmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov${year}.scp
 # per-utterance fMLLR
-scripts/decode.sh exp/decode_tri3a_tgpr_uttfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov92.scp 
+scripts/decode.sh exp/decode_tri3a_tgpr_uttfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_fmllr.sh data/eval_nov${year}.scp 
 # per-speaker diagonal fMLLR
-scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_dfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov92.scp 
+scripts/decode.sh --per-spk exp/decode_tri3a_tgpr_dfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov${year}.scp 
 # per-utterance diagonal fMLLR
-scripts/decode.sh exp/decode_tri3a_tgpr_uttdfmllr_eval92 exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov92.scp 
+scripts/decode.sh exp/decode_tri3a_tgpr_uttdfmllr_eval${year} exp/graph_tri3a_tg_pruned/HCLG.fst steps/decode_tri3a_diag_fmllr.sh data/eval_nov${year}.scp 
+done
 )&

+# also doing tri3a with bigram
+(
+ scripts/mkgraph.sh data/G_bg.fst exp/tri3a/tree exp/tri3a/final.mdl exp/graph_tri3a_bg || exit 1;
+ scripts/decode.sh exp/decode_tri3a_bg_eval92 exp/graph_tri3a_bg/HCLG.fst steps/decode_tri3a.sh data/eval_nov92.scp 
+ scripts/decode.sh exp/decode_tri3a_bg_eval93 exp/graph_tri3a_bg/HCLG.fst steps/decode_tri3a.sh data/eval_nov93.scp 
+)&
+
+
 # will delete:
 ## scripts/decode_queue_fmllr.sh exp/graph_tri3a_tg_pruned exp/tri3a/final.mdl exp/decode_tri3a_tg_pruned_fmllr &

@ -223,6 +242,12 @@ steps/train_tri2b.sh
 scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov92.scp 
 scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp 
 scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp 
+
+ scripts/decode.sh exp/decode_tri2b_tgpr_utt_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp 
+ scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp 
+ scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp 
+ scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp 
+
 ) &

 # Cepstral Mean Normalization (CMN)
@ -250,35 +275,47 @@ steps/train_tri2e.sh
 # Splice+LDA+MLLT
 steps/train_tri2f.sh
 (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2f/tree exp/tri2f/final.mdl exp/graph_tri2f_tg_pruned || exit 1;
- scripts/decode.sh exp/decode_tri2f_tgpr_eval92 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov92.scp  )&
+ scripts/decode.sh exp/decode_tri2f_tgpr_eval92 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov92.scp  
+ scripts/decode.sh exp/decode_tri2f_tgpr_eval93 exp/graph_tri2f_tg_pruned/HCLG.fst steps/decode_tri2f.sh data/eval_nov93.scp  
+)&

 # Linear VTLN (+ regular VTLN)
 steps/train_tri2g.sh
-(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2g/tree exp/tri2g/final.mdl exp/graph_tri2g_tg_pruned || exit 1;
- scripts/decode.sh exp/decode_tri2g_tgpr_utt_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov92.scp  
- scripts/decode.sh exp/decode_tri2g_tgpr_utt_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov92.scp  
- scripts/decode.sh --wav exp/decode_tri2g_tgpr_utt_vtln_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov92.scp  
+(
+ scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2g/tree exp/tri2g/final.mdl exp/graph_tri2g_tg_pruned || exit 1;

- scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov92.scp  
- scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov92.scp  
- scripts/decode.sh --wav --per-spk exp/decode_tri2g_tgpr_vtln_diag_eval92 exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov92.scp  
+for year in 92 93; do
+ scripts/decode.sh exp/decode_tri2g_tgpr_utt_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov${year}.scp  
+ scripts/decode.sh exp/decode_tri2g_tgpr_utt_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov${year}.scp  
+ scripts/decode.sh --wav exp/decode_tri2g_tgpr_utt_vtln_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov${year}.scp  
+
+ scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g.sh data/eval_nov${year}.scp  
+ scripts/decode.sh --per-spk exp/decode_tri2g_tgpr_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_diag.sh data/eval_nov${year}.scp  
+ scripts/decode.sh --wav --per-spk exp/decode_tri2g_tgpr_vtln_diag_eval${year} exp/graph_tri2g_tg_pruned/HCLG.fst steps/decode_tri2g_vtln_diag.sh data/eval_nov${year}.scp  
+done

 )&

 # Splice+HLDA
 steps/train_tri2h.sh
 (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2h/tree exp/tri2h/final.mdl exp/graph_tri2h_tg_pruned || exit 1;
- scripts/decode.sh exp/decode_tri2h_tgpr_eval92 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov92.scp  )&
+ scripts/decode.sh exp/decode_tri2h_tgpr_eval92 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov92.scp  
+ scripts/decode.sh exp/decode_tri2h_tgpr_eval93 exp/graph_tri2h_tg_pruned/HCLG.fst steps/decode_tri2h.sh data/eval_nov93.scp  
+)&

 # Triple-deltas + HLDA
 steps/train_tri2i.sh
 (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2i/tree exp/tri2i/final.mdl exp/graph_tri2i_tg_pruned || exit 1;
- scripts/decode.sh exp/decode_tri2i_tgpr_eval92 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov92.scp  )&
+ scripts/decode.sh exp/decode_tri2i_tgpr_eval92 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov92.scp  
+ scripts/decode.sh exp/decode_tri2i_tgpr_eval93 exp/graph_tri2i_tg_pruned/HCLG.fst steps/decode_tri2i.sh data/eval_nov93.scp  
+)&

 # Splice + HLDA
 steps/train_tri2j.sh
 (scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2j/tree exp/tri2j/final.mdl exp/graph_tri2j_tg_pruned || exit 1;
- scripts/decode.sh exp/decode_tri2j_tgpr_eval92 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov92.scp  )&
+ scripts/decode.sh exp/decode_tri2j_tgpr_eval92 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov92.scp
+ scripts/decode.sh exp/decode_tri2j_tgpr_eval93 exp/graph_tri2j_tg_pruned/HCLG.fst steps/decode_tri2j.sh data/eval_nov93.scp 
+ )&


 # LDA+ET
@ -307,4 +344,5 @@ steps/train_tri2l.sh


 # For an e.g. of scoring with sclite: do e.g.
-#  scripts/score_sclite.sh exp/decode_tri2a_tg_pruned 
+# scripts/score_sclite.sh exp/decode_tri2a_tgpr_eval92 data/eval_nov92.txt
+# cat exp/decode_tri2a_tgpr_eval92/scoring/hyp.sys
--- a/egs/wsj/s1/scripts/mkgraph.sh
+++ b/egs/wsj/s1/scripts/mkgraph.sh
@ -14,8 +14,7 @@
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License.

-
-. path.sh
+                                                                      

 reorder=true # Dan-style, make false for Mirko+Lukas's decoder.

@ -55,10 +54,8 @@ loopscale=0.1
 tscale=1.0


-
-
 if [ "$nopath" != "1" ]; then
-  export PATH=$PATH:../src/fstbin/:../src/bin:../openfst-1.2/src/bin/
+  . path.sh
 fi

 mkdir -p $dir
--- a/egs/wsj/s1/scripts/score_sclite.sh
+++ b/egs/wsj/s1/scripts/score_sclite.sh
@ -17,29 +17,36 @@

 # Does the sclite version of scoring in decode directories.

-if [ $# != 1 ]; then
-   echo "Usage: scripts/score_sclite.sh <decode-dir>"
+if [ $# != 2 ]; then
+   echo "Usage: scripts/score_sclite.sh <decode-dir> <ref>"
   exit 1;
 fi

-sclite=../tools/sctk-2.4.0/bin/sclite
+sclite=../../../tools/sctk-2.4.0/bin/sclite

 if [ ! -f $sclite  ]; then
-   echo "The sclite program is not there.  Follow the INSTALL instructions in ../tools";
+   echo "The sclite program is not there.  Follow the INSTALL instructions in ../../../tools";
   exit 1;
 fi

 dir=$1
+ref=$2
+
+if [ ! -f "$ref" ]; then
+   echo "Reference file $ref is not there"
+   exit 1
+fi
+

 scoredir=$dir/scoring
 mkdir $scoredir

-cat $dir/test?*.tra  | \
+cat $dir/*.tra  | \
  scripts/int2sym.pl --ignore-first-field data/words.txt | \
  sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \
  scripts/transcript2hyp.pl > $scoredir/hyp

-cat data/test_trans.txt | scripts/transcript2hyp.pl | sed 's:<NOISE>::g' | \
+cat $ref | scripts/transcript2hyp.pl | sed 's:<NOISE>::g' | \
  sed 's:<SPOKEN_NOISE>::g' > $scoredir/ref

 $sclite -r $scoredir/ref trn -h $scoredir/hyp trn -i wsj -o all -o dtl
--- a/src/doc/about.dox
+++ b/src/doc/about.dox
@ -24,8 +24,8 @@

 @section about_what What is Kaldi?

-  Kaldi is a toolkit for speech recognition written in C++ and (to be) released
-  under the open source Apache license.  Kaldi is intended for use by speech
+  Kaldi is a toolkit for speech recognition written in C++ and licensed under
+  the  Apache License v2.0.  Kaldi is intended for use by speech
  recognition researchers.  For more detailed history and list of contributors see
  \ref history.

--- a/src/doc/error.dox
+++ b/src/doc/error.dox
@ -74,9 +74,8 @@ namespace kaldi {
 KALDI_ASSERT(ApproxEqual(delta, objf_change) && "Probable coding error in optimization");
 \endcode

- If compiled normally asserts will get checked, but not if compiled with NDEBUG
- (with the current \ref build_setup "build setup", the NDEBUG string will be defined
- if compiled with make DEBUGLEVEL=0).  For inner-loop assertions that use a lot of CPU,
+ If compiled normally asserts will get checked, but not if compiled with NDEBUG defined.
+ For inner-loop assertions that use a lot of CPU,
 we use the following pattern:
 \code
 #ifdef KALDI_PARANOID
--- a/src/doc/mainpage.dox
+++ b/src/doc/mainpage.dox
@ -31,7 +31,7 @@
  \mainpage Kaldi

   (see also Kaldi's <a href=http://sourceforge.net/projects/kaldi/> project page on Sourceforge </a>)
-
+   <p>
   - \ref about 
   - \ref install 
   - \ref build_setup 
--- a/src/featbin/compute-mfcc-feats.cc
+++ b/src/featbin/compute-mfcc-feats.cc
@ -32,7 +32,6 @@ int main(int argc, char *argv[])
    // construct all the global objects
    ParseOptions po(usage);
    MfccOptions mfcc_opts;
-    bool binary = true;
    bool subtract_mean = false;
    BaseFloat vtln_warp = 1.0;
    std::string vtln_map_rspecifier;
@ -44,7 +43,6 @@ int main(int argc, char *argv[])
    // Register the options
    po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
    po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. ");
-    po.Register("binary", &binary, "Write output in binary mode");
    po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)");
    po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
    po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");
--- a/src/featbin/compute-plp-feats.cc
+++ b/src/featbin/compute-plp-feats.cc
@ -32,7 +32,6 @@ int main(int argc, char *argv[])
    // construct all the global objects
    ParseOptions po(usage);
    PlpOptions plp_opts;
-    bool binary = true;
    bool subtract_mean = false;
    BaseFloat vtln_warp = 1.0;
    std::string vtln_map_rspecifier;
@ -44,7 +43,6 @@ int main(int argc, char *argv[])
    // Register the options
    po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
    po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]. ");
-    po.Register("binary", &binary, "Write output in binary mode");
    po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)");    po.Register("vtln-warp", &vtln_warp, "Vtln warp factor");
    po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
    po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");