Merge pull request #775 from vesis84/nnet1_smbr_scripts

nnet1: updating the MMI / sMBR scripts,
This commit is contained in:
Karel Vesely 2016-05-12 16:21:57 +02:00
Родитель 3ce167e11f 062ac0e028
Коммит 3d16b2366c
2 изменённых файлов: 23 добавлений и 11 удалений

Просмотреть файл

@ -57,7 +57,9 @@ alidir=$4
denlatdir=$5
dir=$6
for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
for f in $data/feats.scp $denlatdir/lat.scp \
$alidir/{tree,final.mdl,ali.1.gz} \
$srcdir/{final.nnet,final.feature_transform}; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
@ -68,7 +70,7 @@ mkdir -p $dir/log
cp $alidir/{final.mdl,tree} $dir
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
silphonelist=`cat $lang/phones/silence.csl`
#Get the files we will need
@ -94,7 +96,7 @@ model=$dir/final.mdl
# Shuffle the feature list to make the GD stochastic!
# By shuffling features, we have to use lattices with random access (indexed by .scp file).
cat $data/feats.scp | utils/shuffle_list.pl --srand $seed > $dir/train.scp
cat $data/feats.scp | utils/shuffle_list.pl --srand $seed >$dir/train.scp
###
### PREPARE FEATURE EXTRACTION PIPELINE
@ -187,7 +189,7 @@ while [ $x -le $num_iters ]; do
--learn-rate=$learn_rate \
--drop-frames=$drop_frames \
--verbose=$verbose \
$cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1
$cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet
fi
cur_mdl=$dir/$x.nnet
@ -209,8 +211,10 @@ else
echo "Re-estimating priors by forwarding 10k utterances from training set."
. cmd.sh
nj=$(cat $alidir/num_jobs)
steps/nnet/make_priors.sh --cmd "$train_cmd" ${ivector:+--ivector "$ivector"} --nj $nj \
$data $dir || exit 1
steps/nnet/make_priors.sh --cmd "$train_cmd" --nj $nj \
${ivector:+ --ivector "$ivector"} \
$data $dir
fi
echo "$0: Done. '$dir'"
exit 0

Просмотреть файл

@ -23,6 +23,7 @@ do_smbr=true
exclude_silphones=true # exclude silphones from approximate accuracy computation
unkphonelist= # exclude unkphones from approximate accuracy computation (overrides exclude_silphones)
one_silence_class=true # true : reduce insertions in sMBR/MPE FW/BW, more stable training,
# (all silphones are seen as a single class in the sMBR/MPE FW/BW)
verbose=1
ivector=
@ -59,7 +60,9 @@ alidir=$4
denlatdir=$5
dir=$6
for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
for f in $data/feats.scp $denlatdir/lat.scp \
$alidir/{tree,final.mdl,ali.1.gz} \
$srcdir/{final.nnet,final.feature_transform}; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
@ -70,7 +73,7 @@ mkdir -p $dir/log
cp $alidir/{final.mdl,tree} $dir
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
silphonelist=`cat $lang/phones/silence.csl`
#Get the files we will need
nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet);
@ -91,7 +94,9 @@ cp $feature_transform $dir/final.feature_transform
model=$dir/final.mdl
[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
#enable/disable silphones from MPE training
# The argument '--silence-phones=csl' together with '--one-silence-class=true'
# will cause regrouping of the silenece phones into a single class in the FW/BW
# which calculates the Loss derivative (the 'new' behavior).
mpe_silphones_arg= #empty
$exclude_silphones && mpe_silphones_arg="--silence-phones=$silphonelist" # all silphones
[ ! -z $unkphonelist ] && mpe_silphones_arg="--silence-phones=$unkphonelist" # unk only
@ -175,7 +180,7 @@ while [ $x -le $num_iters ]; do
--verbose=$verbose \
--one-silence-class=$one_silence_class \
$mpe_silphones_arg \
$cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1
$cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet
fi
cur_mdl=$dir/$x.nnet
@ -198,7 +203,10 @@ else
echo "Re-estimating priors by forwarding 10k utterances from training set."
. cmd.sh
nj=$(cat $alidir/num_jobs)
steps/nnet/make_priors.sh --cmd "$train_cmd" --nj $nj $data $dir || exit 1
steps/nnet/make_priors.sh --cmd "$train_cmd" --nj $nj \
${ivector:+ --ivector "$ivector"} \
$data $dir
fi
echo "$0: Done. '$dir'"
exit 0