git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@232 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Karel Vesely 2011-08-09 09:51:33 +00:00
Родитель 14193e6af2
Коммит aea2f4f72e
4 изменённых файлов: 0 добавлений и 335 удалений

Просмотреть файл

@ -1,59 +0,0 @@
#!/bin/bash
# Copyright 2011 Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Pure hybrid monophone decoding script.
if [ -f path.sh ]; then . path.sh; fi
dir=exp/decode_nnet_pdf
tree=exp/mono/tree
mkdir -p $dir
model=exp/mono/final.mdl
graphdir=exp/graph_mono
nnet=exp/nnet_pdf/nnet_final
scripts/mkgraph.sh --mono $tree $model $graphdir
echo "DECODING..."
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
#get features
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
#compute CMVN
cmvn=ark:$dir/test_${test}_cmvn.ark
compute-cmvn-stats "$feats" $cmvn
feats="$feats apply-cmvn --print-args=false --norm-vars=true $cmvn ark:- ark:- |"
#add MLP transform
feats="$feats nnet-forward --print-args=false --apply-log=true $nnet ark:- ark:- |"
echo $feats
decode-faster-mapped --beam=20.0 --acoustic-scale=0.22 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -1,57 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
dir=exp/decode_nnet_trans
tree=exp/mono/tree
mkdir -p $dir
model=exp/mono/final.mdl
graphdir=exp/graph_mono
nnet=exp/nnet_trans/nnet_final
scripts/mkgraph.sh --mono $tree $model $graphdir
echo "DECODING..."
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
#get features
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
#compute CMVN
cmvn=ark:$dir/test_${test}_cmvn.ark
compute-cmvn-stats "$feats" $cmvn
feats="$feats apply-cmvn --print-args=false --norm-vars=true $cmvn ark:- ark:- |"
#add MLP transform
feats="$feats nnet-forward --print-args=false --apply-log=true $nnet ark:- ark:- |"
echo $feats
decode-faster --beam=20.0 --acoustic-scale=0.22 --word-symbol-table=data/words.txt $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -1,110 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
dir=$PWD/exp/nnet_pdf
mkdir -p $dir/{log,nnet}
#use following features and alignments
cp exp/mono/train.scp exp/mono/cur.ali $dir
head -n 800 $dir/train.scp > $dir/train.scp.tr
tail -n 200 $dir/train.scp > $dir/train.scp.cv
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
feats_tr="ark:add-deltas --print-args=false scp:$dir/train.scp.tr ark:- |"
feats_cv="ark:add-deltas --print-args=false scp:$dir/train.scp.cv ark:- |"
#convert ali to pdf
ali-to-pdf exp/mono/final.mdl ark:$dir/cur.ali t,ark:$dir/cur.pdf
labels="ark:$dir/cur.pdf"
#compute per utterance CMVN
cmvn="ark:$dir/cmvn.ark"
compute-cmvn-stats "$feats" $cmvn
feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=true $cmvn ark:- ark:- |"
feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=true $cmvn ark:- ark:- |"
#initialize the nnet
mlp_init=$dir/nnet.init
scripts/gen_mlp_init.py --dim=39:512:146 --gauss --negbias > $mlp_init
#global config for trainig
max_iters=20
start_halving_inc=0.5
end_halving_inc=0.1
lrate=0.001
nnet-train-xent-hardlab-perutt --cross-validate=true $mlp_init "$feats_cv" "$labels" &> $dir/log/prerun.log
if [ $? != 0 ]; then cat $dir/log/prerun.log; exit 1; fi
acc=$(cat $dir/log/prerun.log | grep Xent | tail -n 1 | cut -d'[' -f 2 | cut -d'%' -f 1)
echo CROSSVAL PRERUN ACCURACY $acc
mlp_best=$mlp_init
mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
halving=0
for iter in $(seq -w $max_iters); do
mlp_next=$dir/nnet/${mlp_base}_iter${iter}
nnet-train-xent-hardlab-perutt --learn-rate=$lrate $mlp_best "$feats_tr" "$labels" $mlp_next &> $dir/log/iter$iter.log
if [ $? != 0 ]; then cat $dir/log/iter$iter.log; exit 1; fi
tr_acc=$(cat $dir/log/iter$iter.log | grep Xent | tail -n 1 | cut -d'[' -f 2 | cut -d'%' -f 1)
echo TRAIN ITERATION $iter ACCURACY $tr_acc LRATE $lrate
nnet-train-xent-hardlab-perutt --cross-validate=true $mlp_next "$feats_cv" "$labels" 1>>$dir/log/iter$iter.log 2>>$dir/log/iter$iter.log
if [ $? != 0 ]; then cat $dir/log/iter$iter.log; exit 1; fi
#accept or reject new parameters
acc_new=$(cat $dir/log/iter$iter.log | grep Xent | tail -n 1 | cut -d'[' -f 2 | cut -d'%' -f 1)
echo CROSSVAL ITERATION $iter ACCURACY $acc_new
acc_prev=$acc
if [ 1 == $(awk 'BEGIN{print('$acc_new' > '$acc')}') ]; then
acc=$acc_new
mlp_best=$dir/nnet/$mlp_base.iter${iter}_tr$(printf "%.5g" $tr_acc)_cv$(printf "%.5g" $acc_new)
mv $mlp_next $mlp_best
echo nnet $mlp_best accepted
else
mlp_reject=$dir/nnet/$mlp_base.iter${iter}_tr$(printf "%.5g" $tr_acc)_cv$(printf "%.5g" $acc_new)
mv $mlp_next $mlp_reject
echo nnet $mlp_reject rejected
fi
#stopping criterion
if [[ 1 == $halving && 1 == $(awk 'BEGIN{print('$acc' < '$acc_prev'+'$end_halving_inc')}') ]]; then
echo finished, too small improvement $(awk 'BEGIN{print('$acc'-'$acc_prev')}')
break
fi
#start annealing when improvement is low
if [ 1 == $(awk 'BEGIN{print('$acc' < '$acc_prev'+'$start_halving_inc')}') ]; then
halving=1
fi
#do annealing
if [ 1 == $halving ]; then
lrate=$(awk 'BEGIN{print('$lrate'*0.5)}')
fi
done
if [ $mlp_best != $mlp_init ]; then
iter=$(echo $mlp_best | sed 's/^.*iter\([0-9][0-9]*\).*$/\1/')
fi
mlp_final=$dir/${mlp_base}_final_iter${iter:-0}_acc${acc}
cp $mlp_best $mlp_final
ln -s $mlp_final $dir/${mlp_base}_final
echo final network $mlp_final

Просмотреть файл

@ -1,109 +0,0 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
dir=$PWD/exp/nnet_trans
mkdir -p $dir/{log,nnet}
#use following features and alignments
cp exp/mono/train.scp exp/mono/cur.ali $dir
head -n 800 $dir/train.scp > $dir/train.scp.tr
tail -n 200 $dir/train.scp > $dir/train.scp.cv
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
feats_tr="ark:add-deltas --print-args=false scp:$dir/train.scp.tr ark:- |"
feats_cv="ark:add-deltas --print-args=false scp:$dir/train.scp.cv ark:- |"
labels="ark:$dir/cur.ali"
#compute per utterance CMVN
cmvn="ark:$dir/cmvn.ark"
compute-cmvn-stats "$feats" $cmvn
feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=true $cmvn ark:- ark:- |"
feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=true $cmvn ark:- ark:- |"
#initialize the nnet
mlp_init=$dir/nnet.init
scripts/gen_mlp_init.py --dim=39:512:301 --gauss --negbias > $mlp_init
#global config for trainig
max_iters=20
start_halving_inc=0.5
end_halving_inc=0.1
lrate=0.001
nnet-train-xent-hardlab-perutt --cross-validate=true $mlp_init "$feats_cv" "$labels" &> $dir/log/prerun.log
if [ $? != 0 ]; then cat $dir/log/prerun.log; exit 1; fi
acc=$(cat $dir/log/prerun.log | grep Xent | tail -n 1 | cut -d'[' -f 2 | cut -d'%' -f 1)
echo CROSSVAL PRERUN ACCURACY $acc
mlp_best=$mlp_init
mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
halving=0
for iter in $(seq -w $max_iters); do
mlp_next=$dir/nnet/${mlp_base}_iter${iter}
nnet-train-xent-hardlab-perutt --learn-rate=$lrate $mlp_best "$feats_tr" "$labels" $mlp_next &> $dir/log/iter$iter.log
if [ $? != 0 ]; then cat $dir/log/iter$iter.log; exit 1; fi
tr_acc=$(cat $dir/log/iter$iter.log | grep Xent | tail -n 1 | cut -d'[' -f 2 | cut -d'%' -f 1)
echo TRAIN ITERATION $iter ACCURACY $tr_acc LRATE $lrate
nnet-train-xent-hardlab-perutt --cross-validate=true $mlp_next "$feats_cv" "$labels" 1>>$dir/log/iter$iter.log 2>>$dir/log/iter$iter.log
if [ $? != 0 ]; then cat $dir/log/iter$iter.log; exit 1; fi
#accept or reject new parameters
acc_new=$(cat $dir/log/iter$iter.log | grep Xent | tail -n 1 | cut -d'[' -f 2 | cut -d'%' -f 1)
echo CROSSVAL ITERATION $iter ACCURACY $acc_new
acc_prev=$acc
if [ 1 == $(awk 'BEGIN{print('$acc_new' > '$acc')}') ]; then
acc=$acc_new
mlp_best=$dir/nnet/$mlp_base.iter${iter}_tr$(printf "%.5g" $tr_acc)_cv$(printf "%.5g" $acc_new)
mv $mlp_next $mlp_best
echo nnet $mlp_best accepted
else
mlp_reject=$dir/nnet/$mlp_base.iter${iter}_tr$(printf "%.5g" $tr_acc)_cv$(printf "%.5g" $acc_new)
mv $mlp_next $mlp_reject
echo nnet $mlp_reject rejected
fi
#stopping criterion
if [[ 1 == $halving && 1 == $(awk 'BEGIN{print('$acc' < '$acc_prev'+'$end_halving_inc')}') ]]; then
echo finished, too small improvement $(awk 'BEGIN{print('$acc'-'$acc_prev')}')
break
fi
#start annealing when improvement is low
if [ 1 == $(awk 'BEGIN{print('$acc' < '$acc_prev'+'$start_halving_inc')}') ]; then
halving=1
fi
#do annealing
if [ 1 == $halving ]; then
lrate=$(awk 'BEGIN{print('$lrate'*0.5)}')
fi
done
if [ $mlp_best != $mlp_init ]; then
iter=$(echo $mlp_best | sed 's/^.*iter\([0-9][0-9]*\).*$/\1/')
fi
mlp_final=$dir/${mlp_base}_final_iter${iter:-0}_acc${acc}
cp $mlp_best $mlp_final
ln -s $mlp_final $dir/${mlp_base}_final
echo final network $mlp_final