- updating pure hybrid setup example

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@280 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2011-08-15 15:38:35 +00:00 · 2011-08-15 15:38:35 +00:00 · 95fa71c649
--- a/egs/rm/s2/run.sh
+++ b/egs/rm/s2/run.sh
@ -79,9 +79,9 @@ steps/decode_nnet_mono_pdf.sh &
 # now, we will train triphone GMM-HMM system to get context-dependent training labels
 # 500 pdfs
 time steps/train_tri1.sh
-(steps/decode_tri1.sh ; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ) &
+steps/decode_tri1.sh &
 time steps/train_tri2a.sh
-(steps/decode_tri2a.sh ; steps/decode_tri2a_fmllr.sh; steps/decode_tri2a_fmllr_utt.sh ) &
+steps/decode_tri2a.sh &

 # train MLP with context-dependent pdf targets
 # 1-frame of MFCC_D_A_0, per-utternace CMN, global CVN, 
--- a/egs/rm/s2/scripts/gen_mlp_init.py
+++ b/egs/rm/s2/scripts/gen_mlp_init.py
@ -18,12 +18,18 @@ parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights'
 parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
 parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
 parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
+parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
 (options, args) = parser.parse_args()

 if(options.dim == None):
    parser.print_help()
    sys.exit(1)

+#seeding
+seedval=int(options.seedval)
+if(seedval != 0):
+    random.seed(seedval)
+

 dimStrL = options.dim.split(':')

--- a/egs/rm/s2/scripts/shuffle_list.pl
+++ b/egs/rm/s2/scripts/shuffle_list.pl
@ -16,8 +16,14 @@
 # limitations under the License.


+#seeding is optional...
+if($#ARGV==0) {
+    srand($ARGV[0]);
+}
+
+
 # This script shuffles lines of a list. 
 # The list is read from stdin and written to stdout. 
-@X = <>;
+@X = <STDIN>;
@X = sort { rand() <=> rand() } @X;
 print @X; 
--- a/egs/rm/s2/steps/decode_mono.sh
+++ b/egs/rm/s2/steps/decode_mono.sh
@ -42,5 +42,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_nnet_mono_pdf.sh
+++ b/egs/rm/s2/steps/decode_nnet_mono_pdf.sh
@ -57,5 +57,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_nnet_mono_trans.sh
+++ b/egs/rm/s2/steps/decode_nnet_mono_trans.sh
@ -55,5 +55,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_nnet_tri2a_s1a.sh
+++ b/egs/rm/s2/steps/decode_nnet_tri2a_s1a.sh
@ -65,5 +65,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_nnet_tri2a_s1b.sh
+++ b/egs/rm/s2/steps/decode_nnet_tri2a_s1b.sh
@ -66,5 +66,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_nnet_tri2a_s2.sh
+++ b/egs/rm/s2/steps/decode_nnet_tri2a_s2.sh
@ -69,5 +69,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_nnet_tri2a_s3.sh
+++ b/egs/rm/s2/steps/decode_nnet_tri2a_s3.sh
@ -68,5 +68,5 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer

--- a/egs/rm/s2/steps/decode_tri1.sh
+++ b/egs/rm/s2/steps/decode_tri1.sh
@ -42,4 +42,4 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer
--- a/egs/rm/s2/steps/decode_tri1_fmllr.sh
+++ b/egs/rm/s2/steps/decode_tri1_fmllr.sh
@ -1,64 +0,0 @@
-#!/bin/bash
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
-# per speaker.  There is no SAT.
-# To be run from ..
-
-if [ -f path.sh ]; then . path.sh; fi
-srcdir=exp/decode_tri1
-dir=exp/decode_tri1_fmllr
-mkdir -p $dir
-model=exp/tri1/final.mdl
-tree=exp/tri1/tree
-graphdir=exp/graph_tri1
-silphones=`cat data/silphones.csl`
-
-mincount=500 # mincount before we estimate a transform.
-
-scripts/mkgraph.sh $tree $model $graphdir
-
-for test in mar87 oct87 feb89 oct89 feb91 sep92; do
- (
-  # Comment the two lines below to make this per-utterance.
-  spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
-  utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
-
-  sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
-
-  ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
-    weight-silence-post 0.01 $silphones $model ark:- ark:- | \
-    gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
-     "$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
-
-  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
-
-  gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
-
-  # the ,p option lets it score partial output without dying..
-
-  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
-    compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra > $dir/wer_${test}
- ) &
-done
-
-wait
-
-grep WER $dir/wer_* | \
-  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
-   > $dir/wer
-
--- a/egs/rm/s2/steps/decode_tri1_regtree_fmllr.sh
+++ b/egs/rm/s2/steps/decode_tri1_regtree_fmllr.sh
@ -1,68 +0,0 @@
-#!/bin/bash
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# deocde_tri_regtree_fmllr.sh is as ../decode_tri.sh but estimating fMLLR in test,
-# per speaker.  There is no SAT.  Use a regression-tree with top-level speech/sil
-# split (no silence weighting).
-
-if [ -f path.sh ]; then . path.sh; fi
-srcdir=exp/decode_tri1
-dir=exp/decode_tri1_regtree_fmllr
-mkdir -p $dir
-model=exp/tri1/final.mdl
-occs=exp/tri1/final.occs
-tree=exp/tri1/tree
-graphdir=exp/graph_tri1
-silphones=`cat data/silphones.csl`
-
-regtree=$dir/regtree
-maxleaves=8 # max # of regression-tree leaves.
-mincount=5000 # mincount before we add new transform.
-gmm-make-regtree --sil-phones=$silphones --state-occs=$occs --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
-
-scripts/mkgraph.sh $tree $model $graphdir
-
-for test in mar87 oct87 feb89 oct89 feb91 sep92; do
- (
-  # Comment the two lines below to make this per-utterance.
-  spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
-  utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
-
-  # To deweight silence, would add the line
-  #   weight-silence-post 0.0 $silphones $model ark:- ark:- | \
-  # after the line with ali-to-post
-  # This is useful if we don't treat silence specially when building regression tree.
-
-  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
-  ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
-    gmm-est-regtree-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model "$feats" ark:- $regtree ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
-
-  gmm-decode-faster-regtree-fmllr $utt2spk_opt --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst $regtree "$feats" ark:$dir/${test}.fmllr ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
-
-  # the ,p option lets it score partial output without dying..
-
-  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
-    compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra > $dir/wer_${test}
- ) &
-done
-
-wait
-
-grep WER $dir/wer_* | \
-  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
-   > $dir/wer
-
--- a/egs/rm/s2/steps/decode_tri2a.sh
+++ b/egs/rm/s2/steps/decode_tri2a.sh
@ -42,4 +42,4 @@ wait

 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
-   > $dir/wer
+   | tee $dir/wer
--- a/egs/rm/s2/steps/decode_tri2a_fmllr.sh
+++ b/egs/rm/s2/steps/decode_tri2a_fmllr.sh
@ -1,66 +0,0 @@
-#!/bin/bash
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
-# per speaker.  There is no SAT.
-# To be run from ..
-
-if [ -f path.sh ]; then . path.sh; fi
-srcdir=exp/decode_tri2a
-dir=exp/decode_tri2a_fmllr
-mkdir -p $dir
-model=exp/tri2a/final.mdl
-tree=exp/tri2a/tree
-graphdir=exp/graph_tri2a
-silphones=`cat data/silphones.csl`
-
-mincount=500 # mincount before we estimate a transform.
-
-scripts/mkgraph.sh $tree $model $graphdir
-
-for test in mar87 oct87 feb89 oct89 feb91 sep92; do
- (
-  # Comment the two lines below to make this per-utterance.
-  # This would only work if $srcdir was also per-utterance [otherwise
-  # you'd have to mess with the script a bit].
-  spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
-  utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
-
-  sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
-
-  ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
-    weight-silence-post 0.01 $silphones $model ark:- ark:- | \
-    gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
-     "$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
-
-  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
-
-  gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
-
-  # the ,p option lets it score partial output without dying..
-
-  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
-    compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra > $dir/wer_${test}
- ) &
-done
-
-wait
-
-grep WER $dir/wer_* | \
-  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
-   > $dir/wer
-
--- a/egs/rm/s2/steps/decode_tri2a_fmllr_utt.sh
+++ b/egs/rm/s2/steps/decode_tri2a_fmllr_utt.sh
@ -1,66 +0,0 @@
-#!/bin/bash
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
-# per speaker.  There is no SAT.
-# To be run from ..
-
-if [ -f path.sh ]; then . path.sh; fi
-srcdir=exp/decode_tri2a
-dir=exp/decode_tri2a_fmllr_utt
-mkdir -p $dir
-model=exp/tri2a/final.mdl
-tree=exp/tri2a/tree
-graphdir=exp/graph_tri2a
-silphones=`cat data/silphones.csl`
-
-mincount=500 # mincount before we estimate a transform.
-
-scripts/mkgraph.sh $tree $model $graphdir
-
-for test in mar87 oct87 feb89 oct89 feb91 sep92; do
- (
-  # Comment the two lines below to make this per-utterance.
-  # This would only work if $srcdir was also per-utterance [otherwise
-  # you'd have to mess with the script a bit].
-  #spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
-  #utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
-
-  sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
-
-  ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
-    weight-silence-post 0.01 $silphones $model ark:- ark:- | \
-    gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
-     "$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
-
-  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
-
-  gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
-
-  # the ,p option lets it score partial output without dying..
-
-  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
-    compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra > $dir/wer_${test}
- ) &
-done
-
-wait
-
-grep WER $dir/wer_* | \
-  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
-   > $dir/wer
-
--- a/egs/rm/s2/steps/train_nnet_tri2a_s1.sh
+++ b/egs/rm/s2/steps/train_nnet_tri2a_s1.sh
@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}


 ###### SELECT FEATURES ######
-cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
+cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
 head -n 3591 $dir/train.scp > $dir/train.scp.tr
 tail -n 399 $dir/train.scp > $dir/train.scp.cv
 feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
@ -50,7 +50,7 @@ feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=true $cvn ark:- ar
 ###### INITIALIZE THE NNET ######
 mlp_init=$dir/nnet.init
 num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
-scripts/gen_mlp_init.py --dim=39:1024:${num_tgt} --gauss --negbias > $mlp_init
+scripts/gen_mlp_init.py --dim=39:1024:${num_tgt} --gauss --negbias --seed=666 > $mlp_init



--- a/egs/rm/s2/steps/train_nnet_tri2a_s2.sh
+++ b/egs/rm/s2/steps/train_nnet_tri2a_s2.sh
@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}


 ###### SELECT FEATURES ######
-cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
+cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
 head -n 3591 $dir/train.scp > $dir/train.scp.tr
 tail -n 399 $dir/train.scp > $dir/train.scp.cv
 feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
@ -54,7 +54,7 @@ feats_cv="$feats_cv splice-feats --print-args=false --left-context=5 --right-con
 ###### INITIALIZE THE NNET ######
 mlp_init=$dir/nnet.init
 num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
-scripts/gen_mlp_init.py --dim=429:568:${num_tgt} --gauss --negbias > $mlp_init
+scripts/gen_mlp_init.py --dim=429:568:${num_tgt} --gauss --negbias --seed=666 > $mlp_init



--- a/egs/rm/s2/steps/train_nnet_tri2a_s3.sh
+++ b/egs/rm/s2/steps/train_nnet_tri2a_s3.sh
@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}


 ###### SELECT FEATURES ######
-cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
+cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
 head -n 3591 $dir/train.scp > $dir/train.scp.tr
 tail -n 399 $dir/train.scp > $dir/train.scp.cv
 feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
@ -46,7 +46,7 @@ feats_cv="$feats_cv splice-feats --print-args=false --left-context=5 --right-con
 ###### INITIALIZE THE NNET ######
 mlp_init=$dir/nnet.init
 num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
-scripts/gen_mlp_init.py --dim=429:577:${num_tgt} --gauss --negbias > $mlp_init
+scripts/gen_mlp_init.py --dim=429:577:${num_tgt} --gauss --negbias --seed=666 > $mlp_init