- updating pure hybrid setup example

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@280 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Karel Vesely 2011-08-15 15:38:35 +00:00
Родитель 8fd0bc17cc
Коммит 95fa71c649
19 изменённых файлов: 30 добавлений и 282 удалений

Просмотреть файл

@ -79,9 +79,9 @@ steps/decode_nnet_mono_pdf.sh &
# now, we will train triphone GMM-HMM system to get context-dependent training labels
# 500 pdfs
time steps/train_tri1.sh
(steps/decode_tri1.sh ; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ) &
steps/decode_tri1.sh &
time steps/train_tri2a.sh
(steps/decode_tri2a.sh ; steps/decode_tri2a_fmllr.sh; steps/decode_tri2a_fmllr_utt.sh ) &
steps/decode_tri2a.sh &
# train MLP with context-dependent pdf targets
# 1-frame of MFCC_D_A_0, per-utternace CMN, global CVN,

Просмотреть файл

@ -18,12 +18,18 @@ parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights'
parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
(options, args) = parser.parse_args()
if(options.dim == None):
parser.print_help()
sys.exit(1)
#seeding
seedval=int(options.seedval)
if(seedval != 0):
random.seed(seedval)
dimStrL = options.dim.split(':')

Просмотреть файл

@ -16,8 +16,14 @@
# limitations under the License.
#seeding is optional...
if($#ARGV==0) {
srand($ARGV[0]);
}
# This script shuffles lines of a list.
# The list is read from stdin and written to stdout.
@X = <>;
@X = <STDIN>;
@X = sort { rand() <=> rand() } @X;
print @X;

Просмотреть файл

@ -42,5 +42,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -57,5 +57,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -55,5 +55,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -65,5 +65,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -66,5 +66,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -69,5 +69,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -68,5 +68,5 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -42,4 +42,4 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -1,64 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
# per speaker. There is no SAT.
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
srcdir=exp/decode_tri1
dir=exp/decode_tri1_fmllr
mkdir -p $dir
model=exp/tri1/final.mdl
tree=exp/tri1/tree
graphdir=exp/graph_tri1
silphones=`cat data/silphones.csl`
mincount=500 # mincount before we estimate a transform.
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
# Comment the two lines below to make this per-utterance.
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
"$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -1,68 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# deocde_tri_regtree_fmllr.sh is as ../decode_tri.sh but estimating fMLLR in test,
# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil
# split (no silence weighting).
if [ -f path.sh ]; then . path.sh; fi
srcdir=exp/decode_tri1
dir=exp/decode_tri1_regtree_fmllr
mkdir -p $dir
model=exp/tri1/final.mdl
occs=exp/tri1/final.occs
tree=exp/tri1/tree
graphdir=exp/graph_tri1
silphones=`cat data/silphones.csl`
regtree=$dir/regtree
maxleaves=8 # max # of regression-tree leaves.
mincount=5000 # mincount before we add new transform.
gmm-make-regtree --sil-phones=$silphones --state-occs=$occs --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
# Comment the two lines below to make this per-utterance.
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
# To deweight silence, would add the line
# weight-silence-post 0.0 $silphones $model ark:- ark:- | \
# after the line with ali-to-post
# This is useful if we don't treat silence specially when building regression tree.
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
gmm-est-regtree-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model "$feats" ark:- $regtree ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
gmm-decode-faster-regtree-fmllr $utt2spk_opt --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst $regtree "$feats" ark:$dir/${test}.fmllr ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -42,4 +42,4 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer
| tee $dir/wer

Просмотреть файл

@ -1,66 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
# per speaker. There is no SAT.
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
srcdir=exp/decode_tri2a
dir=exp/decode_tri2a_fmllr
mkdir -p $dir
model=exp/tri2a/final.mdl
tree=exp/tri2a/tree
graphdir=exp/graph_tri2a
silphones=`cat data/silphones.csl`
mincount=500 # mincount before we estimate a transform.
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
# Comment the two lines below to make this per-utterance.
# This would only work if $srcdir was also per-utterance [otherwise
# you'd have to mess with the script a bit].
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
"$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -1,66 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
# per speaker. There is no SAT.
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
srcdir=exp/decode_tri2a
dir=exp/decode_tri2a_fmllr_utt
mkdir -p $dir
model=exp/tri2a/final.mdl
tree=exp/tri2a/tree
graphdir=exp/graph_tri2a
silphones=`cat data/silphones.csl`
mincount=500 # mincount before we estimate a transform.
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
# Comment the two lines below to make this per-utterance.
# This would only work if $srcdir was also per-utterance [otherwise
# you'd have to mess with the script a bit].
#spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
#utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
"$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}
###### SELECT FEATURES ######
cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
head -n 3591 $dir/train.scp > $dir/train.scp.tr
tail -n 399 $dir/train.scp > $dir/train.scp.cv
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
@ -50,7 +50,7 @@ feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=true $cvn ark:- ar
###### INITIALIZE THE NNET ######
mlp_init=$dir/nnet.init
num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
scripts/gen_mlp_init.py --dim=39:1024:${num_tgt} --gauss --negbias > $mlp_init
scripts/gen_mlp_init.py --dim=39:1024:${num_tgt} --gauss --negbias --seed=666 > $mlp_init

Просмотреть файл

@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}
###### SELECT FEATURES ######
cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
head -n 3591 $dir/train.scp > $dir/train.scp.tr
tail -n 399 $dir/train.scp > $dir/train.scp.cv
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
@ -54,7 +54,7 @@ feats_cv="$feats_cv splice-feats --print-args=false --left-context=5 --right-con
###### INITIALIZE THE NNET ######
mlp_init=$dir/nnet.init
num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
scripts/gen_mlp_init.py --dim=429:568:${num_tgt} --gauss --negbias > $mlp_init
scripts/gen_mlp_init.py --dim=429:568:${num_tgt} --gauss --negbias --seed=666 > $mlp_init

Просмотреть файл

@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}
###### SELECT FEATURES ######
cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
head -n 3591 $dir/train.scp > $dir/train.scp.tr
tail -n 399 $dir/train.scp > $dir/train.scp.cv
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
@ -46,7 +46,7 @@ feats_cv="$feats_cv splice-feats --print-args=false --left-context=5 --right-con
###### INITIALIZE THE NNET ######
mlp_init=$dir/nnet.init
num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
scripts/gen_mlp_init.py --dim=429:577:${num_tgt} --gauss --negbias > $mlp_init
scripts/gen_mlp_init.py --dim=429:577:${num_tgt} --gauss --negbias --seed=666 > $mlp_init