зеркало из https://github.com/mozilla/kaldi.git
Fixes to recipes; updating RESULTS files [nearly finished with WSJ one].
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@113 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
7b0f9e2169
Коммит
75883cff68
|
@ -10,20 +10,68 @@ feb89 oct89 feb91 sep92 avg
|
|||
2.77 4.02 3.30 6.29 4.10 % from my ICASSP'99 paper on Frame Discrimination (ML baseline)
|
||||
3.20 4.10 2.86 6.06 4.06 % from decode_tri2c (which is triphone + CMN)
|
||||
|
||||
exp/decode_mono/wer:Average WER is 14.234421 (1784 / 12533)
|
||||
exp/decode_tri1/wer:Average WER is 4.420330 (554 / 12533) # First triphone pass
|
||||
exp/decode_tri1_fmllr/wer:Average WER is 4.707572 (590 / 12533) # + fMLLR
|
||||
exp/decode_tri1_regtree_fmllr/wer:Average WER is 4.707572 (590 / 12533) # + regression-tree
|
||||
exp/decode_tri2a/wer:Average WER is 4.476183 (561 / 12533) # Second triphone pass
|
||||
exp/decode_tri2a_fmllr/wer:Average WER is 3.718184 (466 / 12533) # + fMLLR
|
||||
exp/decode_tri2a_fmllr_utt/wer:Average WER is 4.452246 (558 / 12533) # [ fMLLR per utterance ]
|
||||
exp/decode_tri2b/wer:Average WER is 2.992101 (375 / 12533) # Exponential transform
|
||||
exp/decode_tri2b_utt/wer:Average WER is 3.247427 (407 / 12533) # [adapt per-utt]
|
||||
exp/decode_tri2c/wer:Average WER is 3.789994 (475 / 12533) # Cepstral mean subtraction (per-spk)
|
||||
exp/decode_tri2d/wer:Average WER is 4.188941 (525 / 12533) # MLLT (= global STC)
|
||||
exp/decode_tri2e/wer:Average WER is 4.923003 (617 / 12533) # splice-9-frames + LDA features
|
||||
exp/decode_tri2f/wer:Average WER is 3.782015 (474 / 12533) # splice-9-frames + LDA + MLLT
|
||||
exp/decode_tri2g/wer:Average WER is 3.670310 (460 / 12533) # Linear VTLN (LVTLN); includes mean-only fMLLR
|
||||
exp/decode_mono/wer:Average WER is 14.234421 (1784 / 12533) # Monophone system, subset
|
||||
|
||||
exp/decode_tri1/wer:Average WER is 4.420330 (554 / 12533) # First triphone pass
|
||||
exp/decode_tri1_fmllr/wer:Average WER is 3.837868 (481 / 12533) # + fMLLR
|
||||
exp/decode_tri1_regtree_fmllr/wer:Average WER is 3.789994 (475 / 12533) # + regression-tree
|
||||
|
||||
|
||||
exp/decode_tri2a/wer:Average WER is 3.973510 (498 / 12533) # Second triphone pass
|
||||
exp/decode_tri2a_fmllr/wer:Average WER is 3.590521 (450 / 12533) # + fMLLR
|
||||
exp/decode_tri2a_fmllr_utt/wer:Average WER is 3.933615 (493 / 12533) # [ fMLLR per utterance ]
|
||||
exp/decode_tri2b/wer:Average WER is 3.303279 (414 / 12533) # Exponential transform
|
||||
exp/decode_tri2b_utt/wer:Average WER is 3.335195 (418 / 12533) # [adapt per-utt]
|
||||
exp/decode_tri2c/wer:Average WER is 3.957552 (496 / 12533) # Cepstral mean subtraction (per-spk)
|
||||
exp/decode_tri2d/wer:Average WER is 4.316604 (541 / 12533) # MLLT (= global STC)
|
||||
exp/decode_tri2e/wer:Average WER is 4.659698 (584 / 12533) # splice-9-frames + LDA features
|
||||
exp/decode_tri2f/wer:Average WER is 3.885742 (487 / 12533) # splice-9-frames + LDA + MLLT
|
||||
exp/decode_tri2g/wer:Average WER is 3.303279 (414 / 12533) # Linear VTLN
|
||||
exp/decode_tri2g_diag/wer:Average WER is 3.135722 (393 / 12533) # Linear VTLN; diagonal adapt in test
|
||||
exp/decode_tri2g_vtln/wer:Average WER is 3.239448 (406 / 12533) # Use warp factors -> feature-level VTLN + offset estimation
|
||||
exp/decode_tri2g_vtln_diag/wer:Average WER is 3.127743 (392 / 12533) # feature-level VTLN + diag fMLLR
|
||||
exp/decode_tri2g_vtln_diag_utt/wer:Average WER is 3.407006 (427 / 12533) # as above, per utt.
|
||||
exp/decode_tri2g_vtln_nofmllr/wer:Average WER is 3.694247 (463 / 12533) # feature-level VTLN but no fMLLR
|
||||
|
||||
exp/decode_tri2h/wer:Average WER is 4.252773 (533 / 12533) # Splice-9-frames + HLDA
|
||||
exp/decode_tri2i/wer:Average WER is 3.981489 (499 / 12533) # Triple-deltas + HLDA
|
||||
exp/decode_tri2j/wer:Average WER is 3.853826 (483 / 12533) # Triple-deltas + LDA + MLLT
|
||||
exp/decode_tri2k/wer:Average WER is 2.968164 (372 / 12533) # LDA + exponential transform
|
||||
exp/decode_tri2k_utt/wer:Average WER is 3.175616 (398 / 12533) # per-utterance adaptation.
|
||||
exp/decode_tri2k_fmllr/wer:Average WER is 2.505386 (314 / 12533) # +fMLLR (per-spk)
|
||||
exp/decode_tri2k_regtree_fmllr/wer:Average WER is 2.513365 (315 / 12533) # +regression tree
|
||||
|
||||
exp/decode_tri2l/wer:Average WER is 2.704859 (339 / 12533) # Splice-9-frames + LDA + MLLT + SAT (fMLLR in test)
|
||||
exp/decode_tri2l_utt/wer:Average WER is 4.930982 (618 / 12533) # [ as decode_tri2l but per-utt in test. ]
|
||||
|
||||
# sgmma is SGMM without speaker vectors.
|
||||
exp/decode_sgmma/wer:Average WER is 3.319237 (416 / 12533)
|
||||
exp/decode_sgmma_fmllr/wer:Average WER is 2.934308 (289 / 9849)
|
||||
exp/decode_sgmma_fmllr_utt/wer:Average WER is 3.303279 (414 / 12533)
|
||||
exp/decode_sgmma_fmllrbasis_utt/wer:Average WER is 3.191574 (400 / 12533)
|
||||
|
||||
# sgmmb is SGMM with speaker vectors.
|
||||
exp/decode_sgmmb/wer:Average WER is 2.760712 (346 / 12533)
|
||||
exp/decode_sgmmb_utt/wer:Average WER is 2.808585 (352 / 12533)
|
||||
exp/decode_sgmmb_fmllr/wer:Average WER is 2.553259 (320 / 12533)
|
||||
|
||||
|
||||
# sgmmc is like sgmmb but with gender dependency [doesn't help here]
|
||||
exp/decode_sgmmc/wer:Average WER is 2.776670 (348 / 12533)
|
||||
exp/decode_sgmmc_fmllr/wer:Average WER is 2.601133 (326 / 12533)
|
||||
|
||||
|
||||
|
||||
exp/decode_tri2a/wer:Average WER is 4.476183 (561 / 12533)
|
||||
exp/decode_tri2a_fmllr/wer:Average WER is 3.718184 (466 / 12533)
|
||||
exp/decode_tri2a_fmllr_utt/wer:Average WER is 4.452246 (558 / 12533)
|
||||
exp/decode_tri2b/wer:Average WER is 2.992101 (375 / 12533)
|
||||
exp/decode_tri2b_utt/wer:Average WER is 3.247427 (407 / 12533)
|
||||
exp/decode_tri2c/wer:Average WER is 3.789994 (475 / 12533)
|
||||
exp/decode_tri2d/wer:Average WER is 4.188941 (525 / 12533)
|
||||
exp/decode_tri2e/wer:Average WER is 4.923003 (617 / 12533)
|
||||
exp/decode_tri2f/wer:Average WER is 3.782015 (474 / 12533)
|
||||
exp/decode_tri2g/wer:Average WER is 3.670310 (460 / 12533)
|
||||
exp/decode_tri2g_diag/wer:Average WER is 3.550626 (445 / 12533) # +change mean-only to diagonal fMLLR
|
||||
exp/decode_tri2g_vtln/wer:Average WER is 3.534668 (443 / 12533) # More conventional VTLN (+mean-only fMLLR)
|
||||
exp/decode_tri2g_vtln_diag/wer:Average WER is 3.438921 (431 / 12533) #+change mean-only to diagonal fMLLR
|
||||
|
@ -39,6 +87,12 @@ exp/decode_tri2k_regtree_fmllr/wer:Average WER is 2.561238 (321 / 12533) # +reg
|
|||
exp/decode_tri2l/wer:Average WER is 2.688901 (337 / 12533) # Splice-9-frames + LDA + MLLT + SAT (fMLLR in test)
|
||||
exp/decode_tri2l_utt/wer:Average WER is 5.066624 (635 / 12533) # [ as decode_tri2l but per-utt in test. ]
|
||||
|
||||
exp/decode_tri2m/wer:Average WER is 3.223490 (404 / 12533) # Splice + LDA + MLLT + Linear VTLN
|
||||
exp/decode_tri2m_diag/wer:Average WER is 3.119764 (391 / 12533) # diagonal not offset CMLLR component
|
||||
exp/decode_tri2m_vtln/wer:Average WER is 4.747467 (595 / 12533) # feature-level VTLN computation
|
||||
exp/decode_tri2m_vtln_diag/wer:Average WER is 3.087848 (387 / 12533) # diagonal, not offset, adapt
|
||||
exp/decode_tri2m_vtln_diag_utt/wer:Average WER is 4.340541 (544 / 12533) # per-utterance, diag adapt.
|
||||
|
||||
|
||||
# sgmma is SGMM without speaker vectors.
|
||||
exp/decode_sgmma/wer:Average WER is 3.151680 (395 / 12533)
|
||||
|
|
|
@ -1,2 +1,4 @@
|
|||
root=../../..
|
||||
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:$PATH
|
||||
export LC_ALL=C
|
||||
export LC_LOCALE_ALL=C
|
||||
|
|
|
@ -65,7 +65,6 @@ steps/train_tri2a.sh
|
|||
(steps/decode_tri2a.sh ; steps/decode_tri2a_fmllr.sh; steps/decode_tri2a_fmllr_utt.sh )&
|
||||
|
||||
|
||||
|
||||
# Then do the same for 2b, 2c, and so on
|
||||
# 2a = basic triphone (all features double-deltas unless stated).
|
||||
# 2b = exponential transform
|
||||
|
@ -79,8 +78,9 @@ steps/train_tri2a.sh
|
|||
# 2j = triple-deltas + LDA + MLLT
|
||||
# 2k = LDA + ET (equiv to LDA+MLLT+ET)
|
||||
# 2l = splice-9-frames + LDA + MLLT + SAT (i.e. train with CMLLR)
|
||||
# 2m = splice-9-frames + LDA + MLLT + LVTLN [depends on 2f]
|
||||
|
||||
for group in "b c d e" "f g h i" "j k l"; do
|
||||
for group in "b c d e" "f g h i" "j k l m"; do
|
||||
for x in $group; do
|
||||
steps/train_tri2$x.sh &
|
||||
done
|
||||
|
@ -115,7 +115,7 @@ steps/train_ubma.sh
|
|||
(steps/train_sgmmb.sh; steps/decode_sgmmb.sh; steps/decode_sgmmb_fmllr.sh; steps/decode_sgmmb_utt.sh )&
|
||||
|
||||
# + gender dependency.
|
||||
(steps/train_sgmmc.sh; steps/decode_sgmmc.sh; steps/decode_sgmmc_fmllr.sh )&
|
||||
(steps/train_ubmb.sh; steps/train_sgmmc.sh; steps/decode_sgmmc.sh; steps/decode_sgmmc_fmllr.sh )&
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -49,12 +49,12 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
|||
( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
|
||||
sgmm-est-spkvecs-gpost "$spk2utt_opt" $model "$feats" ark,s,cs:- \
|
||||
sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
|
||||
ark:$dir/test_${test}.vecs1 ) 2>$dir/vecs1_${test}.log
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 "$spk2utt_opt" \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 $spk2utt_opt \
|
||||
$model "$feats" ark,s,cs:- ark:$dir/test_${test}.vecs2 ) 2>$dir/vecs2_${test}.log
|
||||
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
|||
|
||||
( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 "$spk2utt_opt" \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 $spk2utt_opt \
|
||||
$model "$feats" ark,s,cs:- ark:$dir/test_${test}.vecs2 ) 2>$dir/vecs2_${test}.log
|
||||
|
||||
sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs2 --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
|
|
@ -55,12 +55,12 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
|||
( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
|
||||
sgmm-est-spkvecs-gpost "$spk2utt_opt" $model "$feats" ark,s,cs:- \
|
||||
sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
|
||||
ark:$dir/test_${test}.vecs1 ) 2>$dir/vecs1_${test}.log
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 "$spk2utt_opt" \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 $spk2utt_opt \
|
||||
$model "$feats" ark,s,cs:- ark:$dir/test_${test}.vecs2 ) 2>$dir/vecs2_${test}.log
|
||||
|
||||
sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs2 --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
|
|
@ -73,12 +73,12 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
|||
( ali-to-post ark:$dir/test_${test}.pass1.ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
|
||||
sgmm-est-spkvecs-gpost "$spk2utt_opt" $model "$feats" ark,s,cs:- \
|
||||
sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
|
||||
ark:$dir/test_${test}.vecs1 ) 2>$dir/vecs1_${test}.log
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}.pass1.ali ark:- | \
|
||||
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 "$spk2utt_opt" \
|
||||
sgmm-est-spkvecs --spk-vecs=ark:$dir/test_${test}.vecs1 h$spk2utt_opt \
|
||||
$model "$feats" ark,s,cs:- ark:$dir/test_${test}.vecs2 ) 2>$dir/vecs2_${test}.log
|
||||
|
||||
# Second-pass decoding with the speaker vectors.
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# to be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/decode_tri2m
|
||||
mkdir -p $dir
|
||||
model=exp/tri2m/final.mdl
|
||||
alignmodel=exp/tri2m/final.alimdl
|
||||
mat=exp/tri2f/final.mat
|
||||
lvtln=exp/tri2m/final.lvtln
|
||||
tree=exp/tri2m/tree
|
||||
graphdir=exp/graph_tri2m
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
# already made the graph.
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
sifeats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# First do SI decoding with alignment model.
|
||||
# Use smaller beam for this, as less critical.
|
||||
gmm-decode-faster --beam=15.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $alignmodel $graphdir/HCLG.fst "$sifeats" ark,t:$dir/test_${test}_pre.tra ark,t:$dir/test_${test}_pre.ali 2> $dir/predecode_${test}.log
|
||||
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alignmodel "$sifeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $model $lvtln \
|
||||
"$sifeats" ark:- ark:$dir/lvtln_${test}.trans ark,t:$dir/lvtln_${test}.warp ) \
|
||||
2>$dir/lvtln_${test}.log || exit 1;
|
||||
|
||||
feats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/lvtln_${test}.trans ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
|
@ -0,0 +1,66 @@
|
|||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# to be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/decode_tri2m_diag
|
||||
mkdir -p $dir
|
||||
model=exp/tri2m/final.mdl
|
||||
alignmodel=exp/tri2m/final.alimdl
|
||||
mat=exp/tri2f/final.mat
|
||||
lvtln=exp/tri2m/final.lvtln
|
||||
tree=exp/tri2m/tree
|
||||
graphdir=exp/graph_tri2m
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
# already made the graph.
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
sifeats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# First do SI decoding with alignment model.
|
||||
# Use smaller beam for this, as less critical.
|
||||
gmm-decode-faster --beam=15.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $alignmodel $graphdir/HCLG.fst "$sifeats" ark,t:$dir/test_${test}_pre.tra ark,t:$dir/test_${test}_pre.ali 2> $dir/predecode_${test}.log
|
||||
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alignmodel "$sifeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --norm-type=diag --verbose=1 $spk2utt_opt $model $lvtln \
|
||||
"$sifeats" ark:- ark:$dir/lvtln_${test}.trans ark,t:$dir/lvtln_${test}.warp ) \
|
||||
2>$dir/lvtln_${test}.log || exit 1;
|
||||
|
||||
feats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/lvtln_${test}.trans ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
|
@ -0,0 +1,80 @@
|
|||
# as decode_tri2m but using the feature-level VTLN
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# as opposed to the linear VTLN when decoding.
|
||||
# Also computing a maximum-likelihood mean offset,
|
||||
# for better comparability with LVTLN.
|
||||
|
||||
# to be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/decode_tri2m_vtln
|
||||
mkdir -p $dir
|
||||
vtlnmodel=exp/tri2m/final.vtlnmdl
|
||||
lvtlnmodel=exp/tri2m/final.mdl
|
||||
alignmodel=exp/tri2m/final.alimdl
|
||||
mat=exp/tri2f/final.mat
|
||||
lvtln=exp/tri2m/final.lvtln
|
||||
tree=exp/tri2m/tree
|
||||
graphdir=exp/graph_tri2m
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
# Doesn't matter which model we use when making the graph
|
||||
# (only the transitions and structure are used).
|
||||
scripts/mkgraph.sh $tree $vtlnmodel $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
sifeats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# First do SI decoding with alignment model.
|
||||
# Use smaller beam for this, as less critical.
|
||||
gmm-decode-faster --beam=15.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $alignmodel $graphdir/HCLG.fst "$sifeats" ark,t:$dir/test_${test}_pre.tra ark,t:$dir/test_${test}_pre.ali 2> $dir/predecode_${test}.log
|
||||
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alignmodel "$sifeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $lvtlnmodel $lvtln \
|
||||
"$sifeats" ark:- ark:/dev/null ark,t:$dir/lvtln_${test}.warp ) \
|
||||
2>$dir/lvtln_${test}.log || exit 1;
|
||||
|
||||
cat $dir/lvtln_${test}.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/${test}.factor
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-update-type=offset $spk2utt_opt $vtlnmodel "$feats" ark,o:- ark:$dir/${test}.trans ) 2>$dir/fmllr_${test}.log || exit 1;
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.trans ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $vtlnmodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
|
@ -0,0 +1,80 @@
|
|||
# as decode_tri2m but using the feature-level VTLN
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# as opposed to the linear VTLN when decoding.
|
||||
# Also computing a diagonal fMLLR transform for
|
||||
# comparison with ET.
|
||||
|
||||
# to be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/decode_tri2m_vtln_diag
|
||||
mkdir -p $dir
|
||||
vtlnmodel=exp/tri2m/final.vtlnmdl
|
||||
lvtlnmodel=exp/tri2m/final.mdl
|
||||
alignmodel=exp/tri2m/final.alimdl
|
||||
mat=exp/tri2f/final.mat
|
||||
lvtln=exp/tri2m/final.lvtln
|
||||
tree=exp/tri2m/tree
|
||||
graphdir=exp/graph_tri2m
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
# Doesn't matter which model we use when making the graph
|
||||
# (only the transitions and structure are used).
|
||||
scripts/mkgraph.sh $tree $vtlnmodel $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
sifeats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# First do SI decoding with alignment model.
|
||||
# Use smaller beam for this, as less critical.
|
||||
gmm-decode-faster --beam=15.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $alignmodel $graphdir/HCLG.fst "$sifeats" ark,t:$dir/test_${test}_pre.tra ark,t:$dir/test_${test}_pre.ali 2> $dir/predecode_${test}.log
|
||||
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alignmodel "$sifeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $lvtlnmodel $lvtln \
|
||||
"$sifeats" ark:- ark:/dev/null ark,t:$dir/lvtln_${test}.warp ) \
|
||||
2>$dir/lvtln_${test}.log || exit 1;
|
||||
|
||||
cat $dir/lvtln_${test}.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/${test}.factor
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-update-type=diag $spk2utt_opt $vtlnmodel "$feats" ark,o:- ark:$dir/${test}.trans ) 2>$dir/fmllr_${test}.log || exit 1;
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.trans ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $vtlnmodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# as opposed to the linear VTLN when decoding.
|
||||
# Also computing a diagonal fMLLR transform for
|
||||
# comparison with ET.
|
||||
|
||||
# to be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/decode_tri2m_vtln_diag_utt
|
||||
mkdir -p $dir
|
||||
vtlnmodel=exp/tri2m/final.vtlnmdl
|
||||
lvtlnmodel=exp/tri2m/final.mdl
|
||||
alignmodel=exp/tri2m/final.alimdl
|
||||
mat=exp/tri2f/final.mat
|
||||
lvtln=exp/tri2m/final.lvtln
|
||||
tree=exp/tri2m/tree
|
||||
graphdir=exp/graph_tri2m
|
||||
silphones=`cat data/silphones.csl`
|
||||
mincount=100 # for diagonal fMLLR
|
||||
|
||||
# Doesn't matter which model we use when making the graph
|
||||
# (only the transitions and structure are used).
|
||||
scripts/mkgraph.sh $tree $vtlnmodel $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
sifeats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# First do SI decoding with alignment model.
|
||||
# Use smaller beam for this, as less critical.
|
||||
gmm-decode-faster --beam=15.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $alignmodel $graphdir/HCLG.fst "$sifeats" ark,t:$dir/test_${test}_pre.tra ark,t:$dir/test_${test}_pre.ali 2> $dir/predecode_${test}.log
|
||||
|
||||
## Comment the two lines below to make this per-utterance.
|
||||
#spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
#utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alignmodel "$sifeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $lvtlnmodel $lvtln \
|
||||
"$sifeats" ark:- ark:/dev/null ark,t:$dir/lvtln_${test}.warp ) \
|
||||
2>$dir/lvtln_${test}.log || exit 1;
|
||||
|
||||
cat $dir/lvtln_${test}.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/${test}.factor
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-update-type=diag --fmllr-min-count=$mincount $spk2utt_opt $vtlnmodel "$feats" ark,o:- ark:$dir/${test}.trans ) 2>$dir/fmllr_${test}.log || exit 1;
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.trans ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $vtlnmodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
|
@ -0,0 +1,71 @@
|
|||
# as decode_tri2m but using the feature-level VTLN
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# as opposed to the linear VTLN when decoding.
|
||||
|
||||
# to be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/decode_tri2m_vtln_nofmllr
|
||||
mkdir -p $dir
|
||||
vtlnmodel=exp/tri2m/final.vtlnmdl
|
||||
lvtlnmodel=exp/tri2m/final.mdl
|
||||
alignmodel=exp/tri2m/final.alimdl
|
||||
lvtln=exp/tri2m/final.lvtln
|
||||
tree=exp/tri2m/tree
|
||||
graphdir=exp/graph_tri2m
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
# Doesn't matter which model we use when making the graph
|
||||
# (only the transitions and structure are used).
|
||||
scripts/mkgraph.sh $tree $vtlnmodel $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
sifeats="ark:splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# First do SI decoding with alignment model.
|
||||
# Use smaller beam for this, as less critical.
|
||||
gmm-decode-faster --beam=15.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $alignmodel $graphdir/HCLG.fst "$sifeats" ark,t:$dir/test_${test}_pre.tra ark,t:$dir/test_${test}_pre.ali 2> $dir/predecode_${test}.log
|
||||
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
( ali-to-post ark:$dir/test_${test}_pre.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alignmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alignmodel "$sifeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $lvtlnmodel $lvtln \
|
||||
"$sifeats" ark:- ark:/dev/null ark,t:$dir/lvtln_${test}.warp ) \
|
||||
2>$dir/lvtln_${test}.log || exit 1;
|
||||
|
||||
cat $dir/lvtln_${test}.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/${test}.factor
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/${test}.factor --config=conf/mfcc.conf scp:data_prep/test_${test}_wav.scp ark:- | splice-feats scp:data/test_${test}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.083333 --word-symbol-table=data/words.txt $vtlnmodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
|
@ -1,87 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
# To be run from ..
|
||||
|
||||
dir=exp/sgmm
|
||||
srcdir=exp/tri1
|
||||
srcmodel=$srcdir/final.mdl
|
||||
srcgraphs="ark:gunzip -c $srcdir/graphs.fsts.gz|"
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
numiters=25 # Total number of iterations
|
||||
|
||||
realign_iters="5 10 15";
|
||||
silphonelist=`cat data/silphones.csl`
|
||||
numsubstates=1500 # Initial #-substates.
|
||||
totsubstates=5000 # Target #-substates.
|
||||
maxiterinc=15 # Last iter to increase #substates on.
|
||||
incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # per-iter increment for #substates
|
||||
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
|
||||
randprune=0.1
|
||||
mkdir -p $dir
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |"
|
||||
|
||||
cp $srcdir/tree $dir
|
||||
|
||||
echo "aligning all training data"
|
||||
if [ ! -f $dir/0.ali ]; then
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel "$srcgraphs" \
|
||||
"$feats" ark,t:$dir/0.ali 2> $dir/align.0.log || exit 1;
|
||||
fi
|
||||
|
||||
if [ ! -f $dir/0.mdl ]; then
|
||||
echo "you must run init_sgmm.sh before train_sgmm1.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $dir/gselect.gz ]; then
|
||||
sgmm-gselect $dir/0.mdl "$feats" ark,t:- 2>$dir/gselect.log | gzip -c > $dir/gselect.gz || exit 1;
|
||||
fi
|
||||
|
||||
cp $dir/0.ali $dir/cur.ali || exit 1;
|
||||
|
||||
iter=0
|
||||
while [ $iter -lt $numiters ]; do
|
||||
echo "Pass $iter ... "
|
||||
if echo $realign_iters | grep -w $iter >/dev/null; then
|
||||
echo "Aligning data"
|
||||
sgmm-align-compiled $scale_opts "$gselect_opt" --beam=8 --retry-beam=40 $dir/$iter.mdl \
|
||||
"$srcgraphs" "$feats" \
|
||||
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
|
||||
fi
|
||||
if [ $iter -gt 0 ]; then
|
||||
flags=vMwcS
|
||||
else
|
||||
flags=vwcS
|
||||
fi
|
||||
if [ ! -f $dir/$[$iter+1].mdl ]; then
|
||||
sgmm-acc-stats-ali --update-flags=$flags "$gselect_opt" --rand-prune=$randprune --binary=false $dir/$iter.mdl "$feats" ark:$dir/cur.ali $dir/$iter.acc 2> $dir/acc.$iter.log || exit 1;
|
||||
sgmm-est --update-flags=$flags --split-substates=$numsubstates --write-occs=$dir/$[$iter+1].occs $dir/$iter.mdl $dir/$iter.acc $dir/$[$iter+1].mdl 2> $dir/update.$iter.log || exit 1;
|
||||
fi
|
||||
# rm $dir/$iter.mdl $dir/$iter.acc
|
||||
# rm $dir/$iter.occs
|
||||
if [ $iter -lt $maxiterinc ]; then
|
||||
numsubstates=$[$numsubstates+$incsubstates]
|
||||
fi
|
||||
iter=$[$iter+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl final.occs 2>/dev/null; ln -s $iter.mdl final.mdl; ln -s $iter.occs final.occs )
|
|
@ -1,103 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This is SGMM training with speaker vectors.
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
# To be run from ..
|
||||
|
||||
dir=exp/sgmm2
|
||||
srcdir=exp/sgmm
|
||||
gmmtridir=exp/tri1
|
||||
trimodel=$gmmtridir/final.mdl
|
||||
srcgraphs="ark:gunzip -c $gmmtridir/graphs.fsts.gz|"
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
numiters=25 # Total number of iterations
|
||||
|
||||
realign_iters="5 10 15";
|
||||
silphonelist=`cat data/silphones.csl`
|
||||
numsubstates=1500 # Initial #-substates.
|
||||
totsubstates=5000 # Target #-substates.
|
||||
maxiterinc=15 # Last iter to increase #substates on.
|
||||
incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # per-iter increment for #substates
|
||||
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
|
||||
randprune=0.1
|
||||
spkdim=39
|
||||
mkdir -p $dir
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |"
|
||||
|
||||
cp $gmmtridir/tree $srcdir/{0.ali,0.mdl,gselect.gz} $dir
|
||||
|
||||
if [ ! -f $dir/0.ali ]; then
|
||||
echo "aligning all training data"
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $trimodel "$srcgraphs" \
|
||||
"$feats" ark,t:$dir/0.ali 2> $dir/align.0.log || exit 1;
|
||||
fi
|
||||
|
||||
if [ ! -f $dir/0.mdl ]; then
|
||||
echo "you must run init_sgmm.sh before train_sgmm2.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $dir/gselect.gz ]; then
|
||||
sgmm-gselect $dir/0.mdl "$feats" ark,t:- 2>$dir/gselect.log | gzip -c > $dir/gselect.gz || exit 1;
|
||||
fi
|
||||
|
||||
cp $dir/0.ali $dir/cur.ali || exit 1;
|
||||
|
||||
iter=0
|
||||
while [ $iter -lt $numiters ]; do
|
||||
echo "Pass $iter ... "
|
||||
if [ $iter -gt 0 ]; then
|
||||
if [ $iter -le 5 ]; then # only train phonetic subspace
|
||||
flags=vMwcS
|
||||
elif [ $(( $iter % 2 )) -eq 1 ]; then # odd iterations
|
||||
flags=vMwcS
|
||||
else # even iterations, update N and not M
|
||||
flags=vwcSN
|
||||
fi
|
||||
else
|
||||
flags=vwcS
|
||||
fi
|
||||
|
||||
if [ ! -f $dir/$[$iter+1].mdl ]; then
|
||||
if echo $realign_iters | grep -w $iter >/dev/null; then
|
||||
echo "Aligning data"
|
||||
sgmm-align-compiled $scale_opts "$gselect_opt" --beam=8 --retry-beam=40 $dir/$iter.mdl \
|
||||
"$srcgraphs" "$feats" \
|
||||
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
|
||||
fi
|
||||
sgmm-acc-stats-ali --update-flags=$flags "$gselect_opt" --rand-prune=$randprune --binary=false $dir/$iter.mdl "$feats" ark:$dir/cur.ali $dir/$iter.acc 2> $dir/acc.$iter.log || exit 1;
|
||||
if [ $iter -eq 5 ]; then # increase spk dimension from 0 to 39
|
||||
sgmm-estimate --update-flags=$flags --increase-spk-dim=$spkdim --split-substates=$numsubstates --write-occs=$dir/$[$iter+1].occs $dir/$iter.mdl $dir/$iter.acc $dir/$[$iter+1].mdl 2> $dir/update.$iter.log || exit 1;
|
||||
else
|
||||
sgmm-estimate --update-flags=$flags --split-substates=$numsubstates --write-occs=$dir/$[$iter+1].occs $dir/$iter.mdl $dir/$iter.acc $dir/$[$iter+1].mdl 2> $dir/update.$iter.log || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
rm $dir/$iter.acc # $dir/$iter.mdl
|
||||
# rm $dir/$iter.occs
|
||||
if [ $iter -lt $maxiterinc ]; then
|
||||
numsubstates=$[$numsubstates+$incsubstates]
|
||||
fi
|
||||
iter=$[$iter+1];
|
||||
done
|
||||
|
||||
( cd $dir; rm final.mdl final.occs 2>/dev/null; ln -s $iter.mdl final.mdl; ln -s $iter.occs final.occs )
|
|
@ -14,8 +14,6 @@
|
|||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
|
||||
# To be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# To be run from ..
|
||||
|
||||
# This (tri2m) is as tri2g except based on LDA+MLLT
|
||||
# features from tri2f.
|
||||
# We also start from tri2f for initial alignments.
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
dir=exp/tri2m
|
||||
srcdir=exp/tri2f
|
||||
srcmodel=$srcdir/final.mdl
|
||||
srcgraphs="ark:gunzip -c $srcdir/graphs.fsts.gz|"
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
numiters=30 # Number of iterations of training
|
||||
maxiterinc=20 # Last iter to increase #Gauss on.
|
||||
numleaves=1800
|
||||
numgauss=$numleaves
|
||||
totgauss=9000 # Target #Gaussians
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
silphonelist=`cat data/silphones.csl`
|
||||
realign_iters="10 15 20 25";
|
||||
lvtln_iters="2 4 6 8 12"; # Recompute LVTLN transforms on these iters.
|
||||
mat=exp/tri2f/final.mat
|
||||
if [ ! -f $mat ]; then
|
||||
echo No input transformation $mat
|
||||
exit 1
|
||||
fi
|
||||
per_spk=true
|
||||
compute_vtlnmdl=true # If true, at the end compute a model with actual feature-space
|
||||
# VTLN features. You can decode with this as an alternative to
|
||||
# final.mdl which takes the LVTLN features.
|
||||
|
||||
numfiles=40 # Number of feature files for computing LVTLN transforms.
|
||||
numclass=31; # Can't really change this without changing the script below
|
||||
defaultclass=15; # Corresponds to no warping.
|
||||
# RE "vtln_warp"
|
||||
|
||||
|
||||
if [ $per_spk == "true" ]; then
|
||||
spk2utt_opt=--spk2utt=ark:data/train.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/train.utt2spk
|
||||
else
|
||||
spk2utt_opt=
|
||||
utt2spk_opt=
|
||||
fi
|
||||
|
||||
mkdir -p $dir
|
||||
cp $srcdir/topo $dir
|
||||
|
||||
|
||||
srcfeats="ark:splice-feats --print-args=false scp:data/train.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
# Will create lvtln.trans below...
|
||||
feats="ark:splice-feats --print-args=false scp:data/train.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/cur.trans ark:- ark:- |"
|
||||
|
||||
gmm-init-lvtln --dim=40 --num-classes=$numclass --default-class=$defaultclass \
|
||||
$dir/0.lvtln 2>$dir/init_lvtln.log || exit 1
|
||||
|
||||
featsub="ark:scripts/subset_scp.pl $numfiles data/train.scp | splice-feats scp:- ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
echo "Initializing lvtln transforms."
|
||||
c=0
|
||||
while [ $c -lt $numclass ]; do
|
||||
warp=`perl -e 'print 0.85 + 0.01*$ARGV[0];' $c`
|
||||
featsub_warp="ark:scripts/subset_scp.pl $numfiles data_prep/train_wav.scp | compute-mfcc-feats --vtln-low=100 --vtln-high=-600 --vtln-warp=$warp --config=conf/mfcc.conf scp:- ark:- | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- |"
|
||||
gmm-train-lvtln-special --normalize-var=true $c $dir/0.lvtln $dir/0.lvtln \
|
||||
"$featsub" "$featsub_warp" 2> $dir/train_special.$c.log || exit 1;
|
||||
c=$[$c+1]
|
||||
done
|
||||
|
||||
|
||||
|
||||
# just a single element. :-separated integer list of context-independent
|
||||
scripts/make_roots.pl --separate data/phones.txt $silphonelist shared split > $dir/roots.txt 2>$dir/roots.log || exit 1;
|
||||
# script below tells it not to cluster, but here we avoid accumulating
|
||||
# CD-stats for silence.
|
||||
|
||||
echo "aligning all training data"
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel \
|
||||
"$srcgraphs" "$srcfeats" ark,t:$dir/0.ali 2> $dir/align.0.log || exit 1;
|
||||
|
||||
|
||||
echo "Computing LVTLN transforms (iter 0)"
|
||||
( ali-to-post ark:$dir/0.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $srcmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $srcmodel "$srcfeats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $srcmodel $dir/0.lvtln \
|
||||
"$srcfeats" ark:- ark:$dir/cur.trans ark,t:$dir/0.warp ) 2>$dir/lvtln.0.log || exit 1
|
||||
|
||||
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" ark:$dir/0.ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
|
||||
|
||||
|
||||
cat data/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
|
||||
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
|
||||
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
|
||||
compile-questions $dir/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
|
||||
|
||||
scripts/make_roots.pl --separate data/phones.txt $silphonelist shared split > $dir/roots.txt 2>$dir/roots.log || exit 1;
|
||||
|
||||
build-tree --verbose=1 --max-leaves=$numleaves \
|
||||
$dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
|
||||
|
||||
gmm-init-model --write-occs=$dir/1.occs \
|
||||
$dir/tree $dir/treeacc $dir/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
|
||||
|
||||
rm $dir/treeacc
|
||||
|
||||
# Convert alignments generated from monophone model, to use as initial alignments.
|
||||
|
||||
convert-ali $srcmodel $dir/1.mdl $dir/tree ark:$dir/0.ali ark:$dir/cur.ali 2>$dir/convert.log
|
||||
# Debug step only: convert back and check they're the same.
|
||||
convert-ali $dir/1.mdl $srcmodel $srcdir/tree ark:$dir/cur.ali ark,t:- \
|
||||
2>/dev/null | cmp - $dir/0.ali || exit 1;
|
||||
|
||||
rm $dir/0.ali
|
||||
|
||||
|
||||
# Make training graphs
|
||||
echo "Compiling training graphs"
|
||||
compile-train-graphs $dir/tree $dir/1.mdl data/L.fst ark:data/train.tra \
|
||||
"ark:|gzip -c > $dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1
|
||||
|
||||
cur_lvtln=$dir/0.lvtln
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo pass $x
|
||||
if echo $lvtln_iters | grep -w $x >/dev/null; then
|
||||
( ali-to-post ark:$dir/cur.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
gmm-post-to-gpost $dir/$x.mdl "$feats" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 $spk2utt_opt $dir/$x.mdl $dir/0.lvtln \
|
||||
"$srcfeats" ark:- ark:$dir/tmp.trans ark,t:$dir/$x.warp ) 2>$dir/lvtln.$x.log || exit 1
|
||||
cp $dir/$x.warp $dir/cur.warp
|
||||
mv $dir/tmp.trans $dir/cur.trans
|
||||
fi
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
|
||||
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
|
||||
fi
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.acc
|
||||
if [ $x -le $maxiterinc ]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
# Accumulate stats for "alignment model" which is as the model but with
|
||||
# the baseline features (shares Gaussian-level alignments).
|
||||
( ali-to-post ark:$dir/cur.ali ark:- | \
|
||||
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$srcfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
|
||||
# Update model.
|
||||
gmm-est --remove-low-count-gaussians=false $dir/$x.mdl $dir/$x.acc2 $dir/$x.alimdl \
|
||||
2>$dir/est_alimdl.log || exit 1;
|
||||
rm $dir/$x.acc2
|
||||
|
||||
|
||||
# The following files contains information that may be useful for display purposes
|
||||
|
||||
for n in 0 $lvtln_iters; do
|
||||
cat $dir/$n.warp | scripts/process_warps.pl data/spk2gender.map > $dir/warps.$n
|
||||
done
|
||||
|
||||
if [ $compute_vtlnmdl == "true" ]; then
|
||||
cat $dir/cur.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/cur.factor
|
||||
compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/cur.factor --config=conf/mfcc.conf scp:data_prep/train_wav.scp ark:$dir/tmp.ark 2>$dir/mfcc.log
|
||||
vtlnfeats="ark:splice-feats ark:$dir/tmp.ark ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
# Compute diagonal fMLLR transform to normalize VTLN feats.
|
||||
( ali-to-post ark:$dir/cur.ali ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-update-type=diag $spk2utt_opt $dir/$x.mdl "$vtlnfeats" ark,o:- ark:$dir/vtln.trans ) 2>$dir/vtln_fmllr.log || exit 1;
|
||||
|
||||
vtlnfeats="ark:splice-feats ark:$dir/tmp.ark ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/vtln.trans ark:- ark:- |"
|
||||
|
||||
( ali-to-post ark:$dir/cur.ali ark:- | \
|
||||
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$vtlnfeats" ark:- $dir/$x.acc3 ) 2>$dir/acc_vtlnmdl.log || exit 1;
|
||||
# Update model.
|
||||
gmm-est $dir/$x.mdl $dir/$x.acc3 $dir/$x.vtlnmdl \
|
||||
2>$dir/est_vtlnmdl.log || exit 1;
|
||||
rm $dir/$x.acc3
|
||||
ln -s $x.vtlnmdl $dir/final.vtlnmdl
|
||||
rm $dir/tmp.ark
|
||||
fi
|
||||
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl;
|
||||
ln -s $x.alimdl final.alimdl;
|
||||
ln -s 0.lvtln final.lvtln;
|
||||
ln -s cur.trans final.trans )
|
|
@ -28,11 +28,11 @@ The "tri3*" systems are trained on all the SI-284 data.
|
|||
LM: | Pruned trigram | Bigram |
|
||||
Test set: | Eval92 | Eval93 | Eval92 | Eval93 |
|
||||
system:
|
||||
mono 31.4
|
||||
tri1 13.3
|
||||
mono 31.4 37.5
|
||||
tri1 13.3 18.2
|
||||
tri2a 12.5 18.3 14.3 21.0 | tri2a is delta+delta-deltas.
|
||||
+fmllr[spk] 11.4
|
||||
[utt] 12.5
|
||||
+fmllr[spk] 11.4 15.5
|
||||
[utt] 12.5 18.4
|
||||
tri3a 10.7 13.8 11.9 15.0 | tri3a is as tri2a but all SI-284 data.
|
||||
+fmllr[spk] 9.5 12.1
|
||||
[diagonal] 10.5 12.7
|
||||
|
@ -42,7 +42,8 @@ system:
|
|||
+fmllr[spk] 10.5 13.9
|
||||
[utt] 11.3 15.1 | [estimating ET per utt.]
|
||||
+fmllr[utt] 11.2 15.3 | [estimating ET and fMLLR per utt]
|
||||
tri2c 12.7 17.0 | as tri2a plus cepstral mean subtraction.
|
||||
tri2c 12.7 16.6 | as tri2a plus cepstral mean subtraction.
|
||||
[utt] 13.0 17.0 | [per utterance CMS in test]
|
||||
tri2d 13.0 19.4 | as tri2a plus STC/MLLT (worse).
|
||||
tri2e 14.3 19.1 | as tri2a but splice+LDA features (worse).
|
||||
tri2f 12.2 17.7 | as tri2e plus STC/MLLT (better than tri2a).
|
||||
|
@ -51,15 +52,21 @@ system:
|
|||
+diag[spk] 10.7 16.5 | + diagonal, not just mean-only fMLLR
|
||||
+diag[utt] 11.1 16.1 | [all per-utt]
|
||||
+vtln,diag 10.9 15.9 | actual VTLN, plus diag-FMLLR
|
||||
[utt] 10.9 16.2 | [all per-utt]
|
||||
[utt] 10.9 16.1 | [all per-utt]
|
||||
tri2h 13.4 20.2 | [ splice-9-frames + HLDA... worse than tri2a]
|
||||
tri2i 12.4 18.4 | [ triple-deltas + HLDA... same as tri2a]
|
||||
tri2j 12.8 18.3 | [ triple-deltas+LDA+MLLT... slightly worse than tri2a]
|
||||
tri2k 10.6 15.0 | [ splice-9-frames + LDA + ET ]
|
||||
[utt] 10.8 15.1 | [adaptation per utterance]
|
||||
[spk,+fmllr] 9.9 14.2 | [per speaker, plus fMLLR]
|
||||
tri2k 10.3 15.0 | [ splice-9-frames + LDA + ET ]
|
||||
[utt] 10.3 15.2 | [adaptation per utterance]
|
||||
[spk,+fmllr] 9.9 14.4 | [per speaker, plus fMLLR]
|
||||
tri2l 9.6 13.7 | train with SAT; test with fMLLR
|
||||
[utt] 12.0 16.8 | [adaptation per utterance]
|
||||
tri2m 10.8 15.0 | [LDA + MLLT + Linear VTLN]
|
||||
[utt] 10.6 14.4 | [per-utt, not per-spk]
|
||||
[diag] 10.7 14.6 | [diagonal, not just offset, CMLLR component]
|
||||
[diag;utt] 10.8 14.5 | [per-utterance]
|
||||
[vtln;diag] 10.7 14.9 | [feature-level VTLN; diagonal CMLLR]
|
||||
[utt] 10.6 14.4 | [per-utterance]
|
||||
sgmm2a 10.4 16.4 | [sgmm, unadapted, on delta features]
|
||||
sgmm2b 10.1 14.1 | [sgmm, spk-vector adaptation only]
|
||||
[utt] 10.2 13.7 | [adapt per utt]
|
||||
|
@ -75,6 +82,83 @@ system:
|
|||
[fmllr] 7.7 9.7 | [per-spk, with fMLLR]
|
||||
|
||||
# Raw results:
|
||||
exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
|
||||
exp/decode_mono_tgpr_eval93/wer:%WER 37.54 [ 1291 / 3439, 52 ins, 385 del, 854 sub ]
|
||||
exp/decode_tri1_tgpr_eval92/wer:%WER 13.30 [ 750 / 5641, 133 ins, 74 del, 543 sub ]
|
||||
exp/decode_tri1_tgpr_eval93/wer:%WER 18.14 [ 624 / 3439, 54 ins, 94 del, 476 sub ]
|
||||
exp/decode_tri2a_tgpr_eval92/wer:%WER 12.52 [ 706 / 5641, 127 ins, 60 del, 519 sub ]
|
||||
exp/decode_tri2a_tgpr_eval93/wer:%WER 18.29 [ 629 / 3439, 47 ins, 104 del, 478 sub ]
|
||||
exp/decode_tri2a_tgpr_fmllr_eval92/wer:%WER 11.42 [ 644 / 5641, 116 ins, 60 del, 468 sub ]
|
||||
exp/decode_tri2a_tgpr_fmllr_utt_eval92/wer:%WER 12.48 [ 704 / 5641, 128 ins, 56 del, 520 sub ]
|
||||
exp/decode_tri2c_tgpr_eval92/wer:%WER 12.71 [ 717 / 5641, 137 ins, 72 del, 508 sub ]
|
||||
exp/decode_tri2c_tgpr_eval93/wer:%WER 16.57 [ 570 / 3439, 62 ins, 87 del, 421 sub ]
|
||||
exp/decode_tri2c_tgpr_utt_eval92/wer:%WER 12.96 [ 731 / 5641, 148 ins, 67 del, 516 sub ]
|
||||
exp/decode_tri2c_tgpr_utt_eval93/wer:%WER 17.01 [ 585 / 3439, 61 ins, 85 del, 439 sub ]
|
||||
exp/decode_tri2d_tgpr_eval92/wer:%WER 13.03 [ 735 / 5641, 138 ins, 74 del, 523 sub ]
|
||||
exp/decode_tri2d_tgpr_eval93/wer:%WER 19.40 [ 667 / 3439, 48 ins, 130 del, 489 sub ]
|
||||
exp/decode_tri2e_tgpr_eval92/wer:%WER 14.29 [ 806 / 5641, 155 ins, 79 del, 572 sub ]
|
||||
exp/decode_tri2e_tgpr_eval93/wer:%WER 19.08 [ 656 / 3439, 71 ins, 120 del, 465 sub ]
|
||||
exp/decode_tri2f_tgpr_eval92/wer:%WER 12.23 [ 690 / 5641, 138 ins, 57 del, 495 sub ]
|
||||
exp/decode_tri2f_tgpr_eval93/wer:%WER 17.74 [ 610 / 3439, 68 ins, 85 del, 457 sub ]
|
||||
exp/decode_tri2g_tgpr_diag_eval92/wer:%WER 10.65 [ 601 / 5641, 111 ins, 55 del, 435 sub ]
|
||||
exp/decode_tri2g_tgpr_diag_eval93/wer:%WER 16.49 [ 567 / 3439, 77 ins, 72 del, 418 sub ]
|
||||
exp/decode_tri2g_tgpr_eval92/wer:%WER 11.08 [ 625 / 5641, 119 ins, 57 del, 449 sub ]
|
||||
exp/decode_tri2g_tgpr_eval93/wer:%WER 16.40 [ 564 / 3439, 72 ins, 68 del, 424 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_diag_eval92/wer:%WER 11.10 [ 626 / 5641, 119 ins, 60 del, 447 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_diag_eval93/wer:%WER 16.08 [ 553 / 3439, 75 ins, 68 del, 410 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_eval92/wer:%WER 11.19 [ 631 / 5641, 117 ins, 59 del, 455 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_eval93/wer:%WER 16.17 [ 556 / 3439, 76 ins, 67 del, 413 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_vtln_diag_eval92/wer:%WER 10.87 [ 613 / 5641, 114 ins, 59 del, 440 sub ]
|
||||
exp/decode_tri2g_tgpr_utt_vtln_diag_eval93/wer:%WER 16.14 [ 555 / 3439, 77 ins, 67 del, 411 sub ]
|
||||
exp/decode_tri2g_tgpr_vtln_diag_eval92/wer:%WER 10.88 [ 614 / 5641, 117 ins, 59 del, 438 sub ]
|
||||
exp/decode_tri2g_tgpr_vtln_diag_eval93/wer:%WER 15.91 [ 547 / 3439, 73 ins, 68 del, 406 sub ]
|
||||
exp/decode_tri2h_tgpr_eval92/wer:%WER 13.40 [ 756 / 5641, 163 ins, 54 del, 539 sub ]
|
||||
exp/decode_tri2h_tgpr_eval93/wer:%WER 20.24 [ 696 / 3439, 69 ins, 109 del, 518 sub ]
|
||||
exp/decode_tri2i_tgpr_eval92/wer:%WER 12.39 [ 699 / 5641, 130 ins, 72 del, 497 sub ]
|
||||
exp/decode_tri2i_tgpr_eval93/wer:%WER 18.35 [ 631 / 3439, 58 ins, 102 del, 471 sub ]
|
||||
exp/decode_tri2j_tgpr_eval92/wer:%WER 12.82 [ 723 / 5641, 127 ins, 70 del, 526 sub ]
|
||||
exp/decode_tri2j_tgpr_eval93/wer:%WER 18.26 [ 628 / 3439, 59 ins, 99 del, 470 sub ]
|
||||
exp/decode_tri2k_tgpr_eval92/wer:%WER 10.26 [ 579 / 5641, 117 ins, 45 del, 417 sub ]
|
||||
exp/decode_tri2k_tgpr_eval93/wer:%WER 15.03 [ 517 / 3439, 73 ins, 71 del, 373 sub ]
|
||||
exp/decode_tri2k_tgpr_fmllr_eval92/wer:%WER 9.86 [ 556 / 5641, 119 ins, 49 del, 388 sub ]
|
||||
exp/decode_tri2k_tgpr_fmllr_eval93/wer:%WER 14.39 [ 495 / 3439, 72 ins, 67 del, 356 sub ]
|
||||
exp/decode_tri2k_tgpr_utt_eval92/wer:%WER 10.30 [ 581 / 5641, 117 ins, 47 del, 417 sub ]
|
||||
exp/decode_tri2k_tgpr_utt_eval93/wer:%WER 15.18 [ 522 / 3439, 76 ins, 69 del, 377 sub ]
|
||||
exp/decode_tri2l_tgpr_eval92/wer:%WER 9.64 [ 544 / 5641, 121 ins, 44 del, 379 sub ]
|
||||
exp/decode_tri2l_tgpr_eval93/wer:%WER 13.72 [ 472 / 3439, 68 ins, 66 del, 338 sub ]
|
||||
exp/decode_tri2l_tgpr_utt_eval92/wer:%WER 12.00 [ 677 / 5641, 141 ins, 60 del, 476 sub ]
|
||||
exp/decode_tri2l_tgpr_utt_eval93/wer:%WER 16.75 [ 576 / 3439, 59 ins, 93 del, 424 sub ]
|
||||
exp/decode_tri2m_tgpr_diag_eval92/wer:%WER 10.67 [ 602 / 5641, 125 ins, 52 del, 425 sub ]
|
||||
exp/decode_tri2m_tgpr_diag_eval93/wer:%WER 14.57 [ 501 / 3439, 67 ins, 64 del, 370 sub ]
|
||||
exp/decode_tri2m_tgpr_eval92/wer:%WER 10.81 [ 610 / 5641, 126 ins, 51 del, 433 sub ]
|
||||
exp/decode_tri2m_tgpr_eval93/wer:%WER 15.00 [ 516 / 3439, 66 ins, 66 del, 384 sub ]
|
||||
exp/decode_tri2m_tgpr_utt_diag_eval92/wer:%WER 10.83 [ 611 / 5641, 118 ins, 55 del, 438 sub ]
|
||||
exp/decode_tri2m_tgpr_utt_diag_eval93/wer:%WER 14.45 [ 497 / 3439, 62 ins, 69 del, 366 sub ]
|
||||
exp/decode_tri2m_tgpr_utt_eval92/wer:%WER 11.01 [ 621 / 5641, 125 ins, 53 del, 443 sub ]
|
||||
exp/decode_tri2m_tgpr_utt_eval93/wer:%WER 14.63 [ 503 / 3439, 65 ins, 67 del, 371 sub ]
|
||||
exp/decode_tri2m_tgpr_utt_vtln_diag_eval92/wer:%WER 10.64 [ 600 / 5641, 123 ins, 51 del, 426 sub ]
|
||||
exp/decode_tri2m_tgpr_utt_vtln_diag_eval93/wer:%WER 14.39 [ 495 / 3439, 60 ins, 70 del, 365 sub ]
|
||||
exp/decode_tri2m_tgpr_vtln_diag_eval92/wer:%WER 10.74 [ 606 / 5641, 125 ins, 52 del, 429 sub ]
|
||||
exp/decode_tri2m_tgpr_vtln_diag_eval93/wer:%WER 14.89 [ 512 / 3439, 68 ins, 67 del, 377 sub ]
|
||||
exp/decode_tri3a_tgpr_dfmllr_eval92/wer:%WER 10.51 [ 593 / 5641, 111 ins, 51 del, 431 sub ]
|
||||
exp/decode_tri3a_tgpr_dfmllr_eval93/wer:%WER 12.68 [ 436 / 3439, 52 ins, 52 del, 332 sub ]
|
||||
exp/decode_tri3a_tgpr_eval92/wer:%WER 10.67 [ 602 / 5641, 131 ins, 43 del, 428 sub ]
|
||||
exp/decode_tri3a_tgpr_eval93/wer:%WER 13.84 [ 476 / 3439, 55 ins, 68 del, 353 sub ]
|
||||
exp/decode_tri3a_tgpr_fmllr_eval92/wer:%WER 9.54 [ 538 / 5641, 114 ins, 47 del, 377 sub ]
|
||||
exp/decode_tri3a_tgpr_fmllr_eval93/wer:%WER 12.13 [ 417 / 3439, 52 ins, 59 del, 306 sub ]
|
||||
exp/decode_tri3a_tgpr_uttdfmllr_eval92/wer:%WER 10.58 [ 597 / 5641, 118 ins, 49 del, 430 sub ]
|
||||
exp/decode_tri3a_tgpr_uttdfmllr_eval93/wer:%WER 13.29 [ 457 / 3439, 49 ins, 57 del, 351 sub ]
|
||||
exp/decode_tri3a_tgpr_uttfmllr_eval92/wer:%WER 10.44 [ 589 / 5641, 122 ins, 47 del, 420 sub ]
|
||||
exp/decode_tri3a_tgpr_uttfmllr_eval93/wer:%WER 13.93 [ 479 / 3439, 56 ins, 69 del, 354 sub ]
|
||||
|
||||
exp/decode_sgmm2a_tgpr_eval92/wer:%WER 10.44 [ 589 / 5641, 129 ins, 38 del, 422 sub ]
|
||||
exp/decode_sgmm2a_tgpr_eval93/wer:%WER 16.40 [ 564 / 3439, 68 ins, 92 del, 404 sub ]
|
||||
|
||||
|
||||
|
||||
# [old:]
|
||||
|
||||
|
||||
exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
|
||||
exp/decode_tri1_tgpr_eval92/wer:%WER 13.30 [ 750 / 5641, 133 ins, 74 del, 543 sub ]
|
||||
exp/decode_tri2a_bg_eval92/wer:%WER 14.25 [ 804 / 5641, 146 ins, 87 del, 571 sub ]
|
||||
|
|
|
@ -172,7 +172,6 @@ dir=[some directory to put MFCCs]
|
|||
steps/make_mfcc_train.sh $dir
|
||||
steps/make_mfcc_test.sh $dir
|
||||
|
||||
|
||||
# (5) running the training and testing steps..
|
||||
|
||||
steps/train_mono.sh || exit 1;
|
||||
|
@ -185,12 +184,16 @@ steps/train_mono.sh || exit 1;
|
|||
# you'd have to modify the script to use that.
|
||||
|
||||
(scripts/mkgraph.sh --mono data/G_tg_pruned.fst exp/mono/tree exp/mono/final.mdl exp/graph_mono_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_mono_tgpr_eval92 exp/graph_mono_tg_pruned/HCLG.fst steps/decode_mono.sh data/eval_nov92.scp ) &
|
||||
scripts/decode.sh exp/decode_mono_tgpr_eval92 exp/graph_mono_tg_pruned/HCLG.fst steps/decode_mono.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_mono_tgpr_eval93 exp/graph_mono_tg_pruned/HCLG.fst steps/decode_mono.sh data/eval_nov93.scp
|
||||
) &
|
||||
|
||||
steps/train_tri1.sh || exit 1;
|
||||
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri1/tree exp/tri1/final.mdl exp/graph_tri1_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri1_tgpr_eval92 exp/graph_tri1_tg_pruned/HCLG.fst steps/decode_tri1.sh data/eval_nov92.scp ) &
|
||||
scripts/decode.sh exp/decode_tri1_tgpr_eval92 exp/graph_tri1_tg_pruned/HCLG.fst steps/decode_tri1.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri1_tgpr_eval93 exp/graph_tri1_tg_pruned/HCLG.fst steps/decode_tri1.sh data/eval_nov93.scp
|
||||
) &
|
||||
|
||||
steps/train_tri2a.sh || exit 1;
|
||||
|
||||
|
@ -201,13 +204,12 @@ steps/train_tri2a.sh || exit 1;
|
|||
# also doing tri2a with bigram
|
||||
(
|
||||
scripts/mkgraph.sh data/G_bg.fst exp/tri2a/tree exp/tri2a/final.mdl exp/graph_tri2a_bg || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2a_bg_eval92 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2a_bg_eval93 exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
|
||||
( scripts/decode.sh exp/decode_tri2a_tgpr_fmllr_utt_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2a_tgpr_fmllr_eval92 exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov92.scp )&
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh exp/decode_tri2a_bg_eval${year} exp/graph_tri2a_bg/HCLG.fst steps/decode_tri2a.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_tri2a_tgpr_fmllr_utt_eval${year} exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2a_tgpr_fmllr_eval${year} exp/graph_tri2a_tg_pruned/HCLG.fst steps/decode_tri2a_fmllr.sh data/eval_nov${year}.scp
|
||||
done
|
||||
)&
|
||||
|
||||
steps/train_tri3a.sh || exit 1;
|
||||
|
||||
|
@ -233,32 +235,28 @@ done
|
|||
)&
|
||||
|
||||
|
||||
# will delete:
|
||||
## scripts/decode_queue_fmllr.sh exp/graph_tri3a_tg_pruned exp/tri3a/final.mdl exp/decode_tri3a_tg_pruned_fmllr &
|
||||
|
||||
#### Now alternative experiments... ###
|
||||
|
||||
# Exponential Transform (ET)
|
||||
steps/train_tri2b.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2b/tree exp/tri2b/final.mdl exp/graph_tri2b_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval92 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov92.scp
|
||||
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval93 exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov93.scp
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_eval${year} exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_eval${year} exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_tri2b_tgpr_utt_fmllr_eval${year} exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2b_tgpr_fmllr_eval${year} exp/graph_tri2b_tg_pruned/HCLG.fst steps/decode_tri2b_fmllr.sh data/eval_nov${year}.scp
|
||||
done
|
||||
|
||||
) &
|
||||
|
||||
# Cepstral Mean Normalization (CMN)
|
||||
|
||||
steps/train_tri2c.sh
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2c/tree exp/tri2c/final.mdl exp/graph_tri2c_tg_pruned || exit 1;
|
||||
scripts/decode.sh exp/decode_tri2c_tgpr_utt_eval92 exp/graph_tri2c_tg_pruned/HCLG.fst steps/decode_tri2c.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2c_tgpr_eval92 exp/graph_tri2c_tg_pruned/HCLG.fst steps/decode_tri2c.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_tri2c_tgpr_eval93 exp/graph_tri2c_tg_pruned/HCLG.fst steps/decode_tri2c.sh data/eval_nov93.scp )&
|
||||
scripts/decode.sh exp/decode_tri2c_tgpr_utt_eval93 exp/graph_tri2c_tg_pruned/HCLG.fst steps/decode_tri2c.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2c_tgpr_eval93 exp/graph_tri2c_tg_pruned/HCLG.fst steps/decode_tri2c.sh data/eval_nov93.scp
|
||||
)&
|
||||
|
||||
|
||||
# MLLT/STC
|
||||
|
@ -351,6 +349,28 @@ steps/train_tri2l.sh
|
|||
)&
|
||||
|
||||
|
||||
|
||||
# LDA + MLLT + Linear VTLN (+ regular VTLN)
|
||||
# Note: this depends on tri2f.
|
||||
steps/train_tri2m.sh
|
||||
(
|
||||
scripts/mkgraph.sh data/G_tg_pruned.fst exp/tri2m/tree exp/tri2m/final.mdl exp/graph_tri2m_tg_pruned || exit 1;
|
||||
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh exp/decode_tri2m_tgpr_utt_eval${year} exp/graph_tri2m_tg_pruned/HCLG.fst steps/decode_tri2m.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_tri2m_tgpr_utt_diag_eval${year} exp/graph_tri2m_tg_pruned/HCLG.fst steps/decode_tri2m_diag.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --wav exp/decode_tri2m_tgpr_utt_vtln_diag_eval${year} exp/graph_tri2m_tg_pruned/HCLG.fst steps/decode_tri2m_vtln_diag.sh data/eval_nov${year}.scp
|
||||
|
||||
scripts/decode.sh --per-spk exp/decode_tri2m_tgpr_eval${year} exp/graph_tri2m_tg_pruned/HCLG.fst steps/decode_tri2m.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_tri2m_tgpr_diag_eval${year} exp/graph_tri2m_tg_pruned/HCLG.fst steps/decode_tri2m_diag.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --wav --per-spk exp/decode_tri2m_tgpr_vtln_diag_eval${year} exp/graph_tri2m_tg_pruned/HCLG.fst steps/decode_tri2m_vtln_diag.sh data/eval_nov${year}.scp
|
||||
done
|
||||
|
||||
)&
|
||||
|
||||
|
||||
train_ubm2a.sh || exit 1;
|
||||
|
||||
# Deltas + SGMM
|
||||
steps/train_sgmm2a.sh || exit 1;
|
||||
|
||||
|
@ -362,36 +382,37 @@ steps/train_sgmm2a.sh || exit 1;
|
|||
steps/train_sgmm2b.sh || exit 1;
|
||||
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/sgmm2b/tree exp/sgmm2b/final.mdl exp/graph_sgmm2b_tg_pruned || exit 1;
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm2b_tgpr_eval92 exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm2b_tgpr_eval93 exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh exp/decode_sgmm2b_tgpr_utt_eval92 exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_sgmm2b_tgpr_utt_eval93 exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm2b_fmllr_tgpr_eval92 exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm2b_fmllr_tgpr_eval93 exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b_fmllr.sh data/eval_nov93.scp )&
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm2b_tgpr_eval${year} exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_sgmm2b_tgpr_utt_eval${year} exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm2b_fmllr_tgpr_eval${year} exp/graph_sgmm2b_tg_pruned/HCLG.fst steps/decode_sgmm2b_fmllr.sh data/eval_nov${year}.scp
|
||||
done
|
||||
)&
|
||||
|
||||
|
||||
# [on all the data]
|
||||
steps/train_ubm3a.sh || exit 1;
|
||||
steps/train_sgmm3b.sh || exit 1;
|
||||
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/sgmm3b/tree exp/sgmm3b/final.mdl exp/graph_sgmm3b_tg_pruned || exit 1;
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3b_tgpr_eval92 exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3b_tgpr_eval93 exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh exp/decode_sgmm3b_tgpr_utt_eval92 exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_sgmm3b_tgpr_utt_eval93 exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3b_fmllr_tgpr_eval92 exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3b_fmllr_tgpr_eval93 exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b_fmllr.sh data/eval_nov93.scp )&
|
||||
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3b_tgpr_eval${year} exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_sgmm3b_tgpr_utt_eval${year} exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3b_fmllr_tgpr_eval${year} exp/graph_sgmm3b_tg_pruned/HCLG.fst steps/decode_sgmm3b_fmllr.sh data/eval_nov${year}.scp
|
||||
done
|
||||
)&
|
||||
|
||||
# [ gender dependent ]
|
||||
steps/train_ubm3b.sh || exit 1;
|
||||
steps/train_sgmm3c.sh || exit 1;
|
||||
|
||||
(scripts/mkgraph.sh data/G_tg_pruned.fst exp/sgmm3c/tree exp/sgmm3c/final.mdl exp/graph_sgmm3c_tg_pruned || exit 1;
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3c_tgpr_eval92 exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3c_tgpr_eval93 exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov93.scp
|
||||
scripts/decode.sh exp/decode_sgmm3c_tgpr_utt_eval92 exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov92.scp
|
||||
scripts/decode.sh exp/decode_sgmm3c_tgpr_utt_eval93 exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov93.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3c_fmllr_tgpr_eval92 exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c_fmllr.sh data/eval_nov92.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3c_fmllr_tgpr_eval93 exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c_fmllr.sh data/eval_nov93.scp )&
|
||||
for year in 92 93; do
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3c_tgpr_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh exp/decode_sgmm3c_tgpr_utt_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov${year}.scp
|
||||
scripts/decode.sh --per-spk exp/decode_sgmm3c_fmllr_tgpr_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c_fmllr.sh data/eval_nov${year}.scp
|
||||
done
|
||||
)&
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ sgmm-decode-faster "$gselect_opt" --beam=$prebeam --max-active=$max_active \
|
|||
( ali-to-post ark:$dir/$job.pre2_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
sgmm-post-to-gpost "$gselect_opt" $model "$feats" ark,s,cs:- ark:- | \
|
||||
sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 "$spk2utt_opt" $fmllr_model "$feats" ark,s,cs:- \
|
||||
sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 $spk2utt_opt $fmllr_model "$feats" ark,s,cs:- \
|
||||
ark:$dir/$job.fmllr ) 2>$dir/est_fmllr${job}.log
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:$scp ark:- | transform-feats $utt2spk_opt ark:$dir/$job.fmllr ark:- ark:- |"
|
||||
|
|
|
@ -108,7 +108,7 @@ sgmm-decode-faster "$gselect_opt" --beam=$prebeam --max-active=$max_active \
|
|||
( ali-to-post ark:$dir/$job.pre2_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
sgmm-post-to-gpost "$gselect_opt" $model "$feats" ark,s,cs:- ark:- | \
|
||||
sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 "$spk2utt_opt" $fmllr_model "$feats" ark,s,cs:- \
|
||||
sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 $spk2utt_opt $fmllr_model "$feats" ark,s,cs:- \
|
||||
ark:$dir/$job.fmllr ) 2>$dir/est_fmllr${job}.log
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:$scp ark:- | transform-feats $utt2spk_opt ark:$dir/$job.fmllr ark:- ark:- |"
|
||||
|
|
|
@ -115,7 +115,7 @@ sgmm-decode-faster "$gselect_opt" --beam=$prebeam --max-active=$max_active \
|
|||
( ali-to-post ark:$dir/$job.pre2_ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
sgmm-post-to-gpost "$gselect_opt" $model "$feats" ark,s,cs:- ark:- | \
|
||||
sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 "$spk2utt_opt" $fmllr_model "$feats" ark,s,cs:- \
|
||||
sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 $spk2utt_opt $fmllr_model "$feats" ark,s,cs:- \
|
||||
ark:$dir/$job.fmllr ) 2>$dir/est_fmllr${job}.log
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:$scp ark:- | transform-feats $utt2spk_opt ark:$dir/$job.fmllr ark:- ark:- |"
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script does the decoding of a single batch of test data (on one core).
|
||||
# It requires arguments. It takes the graphdir and decoding directory, and the
|
||||
# job number. It expects a file $decode_dir/test${job_number}.scp to exist, and
|
||||
# puts its output in $decode_dir/${job_number}.tra
|
||||
#
|
||||
# If the files
|
||||
# $decode_dir/${job_number}.utt2spk and $decode_dir/${job_number}.spk2utt exist,
|
||||
# this script will assume you want to do per-speaker (not per-utterance) adaptation.
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/decode_tri2m.sh <graph> <decode-dir> <job-number>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
. path.sh || exit 1;
|
||||
|
||||
acwt=0.0625
|
||||
beam=13.0
|
||||
prebeam=12.0 # first-pass decoding beam...
|
||||
max_active=7000
|
||||
alimodel=exp/tri2m/final.alimdl # first-pass model...
|
||||
model=exp/tri2m/final.mdl
|
||||
lvtln=exp/tri2m/0.lvtln
|
||||
mat=exp/tri2f/final.mat
|
||||
#####################
|
||||
silphones=`cat data/silphones.csl`
|
||||
graph=$1
|
||||
dir=$2
|
||||
job=$3
|
||||
scp=$dir/$job.scp
|
||||
sifeats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
if [ -f $dir/$job.spk2utt ]; then
|
||||
if [ ! -f $dir/$job.utt2spk ]; then
|
||||
echo "spk2utt but not utt2spk file present!"
|
||||
exit 1
|
||||
fi
|
||||
spk2utt_opt=--spk2utt=ark:$dir/$job.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:$dir/$job.utt2spk
|
||||
fi
|
||||
|
||||
filenames="$scp $model $alimodel $et $graph data/words.txt"
|
||||
for file in $filenames; do
|
||||
if [ ! -f $file ] ; then
|
||||
echo "No such file $file";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
echo running on `hostname` > $dir/predecode${job}.log
|
||||
|
||||
# First-pass decoding
|
||||
|
||||
gmm-decode-faster --beam=$prebeam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $alimodel $graph "$sifeats" ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali 2>>$dir/predecode${job}.log
|
||||
|
||||
# Estimate transforms
|
||||
ali-to-post ark:$dir/$job.pre_ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alimodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alimodel "$sifeats" ark,o:- ark:- | \
|
||||
gmm-est-lvtln-trans $spk2utt_opt $model $lvtln "$sifeats" ark,o:- \
|
||||
ark:$dir/$job.trans ark,t:$dir/$job.warp 2>$dir/lvtln${job}.log
|
||||
|
||||
feats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.trans ark:- ark:- |"
|
||||
|
||||
# Final decoding
|
||||
echo running on `hostname` > $dir/decode$job.log
|
||||
gmm-decode-faster --beam=$beam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $model $graph "$feats" ark,t:$dir/$job.tra ark,t:$dir/$job.ali 2>>$dir/decode$job.log
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script does the decoding of a single batch of test data (on one core).
|
||||
# It requires arguments. It takes the graphdir and decoding directory, and the
|
||||
# job number. It expects a file $decode_dir/test${job_number}.scp to exist, and
|
||||
# puts its output in $decode_dir/${job_number}.tra
|
||||
#
|
||||
# If the files
|
||||
# $decode_dir/${job_number}.utt2spk and $decode_dir/${job_number}.spk2utt exist,
|
||||
# this script will assume you want to do per-speaker (not per-utterance) adaptation.
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/decode_tri2m.sh <graph> <decode-dir> <job-number>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
. path.sh || exit 1;
|
||||
|
||||
acwt=0.0625
|
||||
beam=13.0
|
||||
prebeam=12.0 # first-pass decoding beam...
|
||||
max_active=7000
|
||||
alimodel=exp/tri2m/final.alimdl # first-pass model...
|
||||
model=exp/tri2m/final.mdl
|
||||
lvtln=exp/tri2m/0.lvtln
|
||||
mat=exp/tri2f/final.mat
|
||||
#####################
|
||||
silphones=`cat data/silphones.csl`
|
||||
graph=$1
|
||||
dir=$2
|
||||
job=$3
|
||||
scp=$dir/$job.scp
|
||||
sifeats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
if [ -f $dir/$job.spk2utt ]; then
|
||||
if [ ! -f $dir/$job.utt2spk ]; then
|
||||
echo "spk2utt but not utt2spk file present!"
|
||||
exit 1
|
||||
fi
|
||||
spk2utt_opt=--spk2utt=ark:$dir/$job.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:$dir/$job.utt2spk
|
||||
fi
|
||||
|
||||
filenames="$scp $model $alimodel $et $graph data/words.txt"
|
||||
for file in $filenames; do
|
||||
if [ ! -f $file ] ; then
|
||||
echo "No such file $file";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
echo running on `hostname` > $dir/predecode${job}.log
|
||||
|
||||
# First-pass decoding
|
||||
|
||||
gmm-decode-faster --beam=$prebeam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $alimodel $graph "$sifeats" ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali 2>>$dir/predecode${job}.log
|
||||
|
||||
# Estimate transforms
|
||||
ali-to-post ark:$dir/$job.pre_ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alimodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alimodel "$sifeats" ark,o:- ark:- | \
|
||||
gmm-est-lvtln-trans --norm-type=diag $spk2utt_opt $model $lvtln "$sifeats" ark,o:- \
|
||||
ark:$dir/$job.trans ark,t:$dir/$job.warp 2>$dir/lvtln${job}.log
|
||||
|
||||
feats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.trans ark:- ark:- |"
|
||||
|
||||
# Final decoding
|
||||
echo running on `hostname` > $dir/decode$job.log
|
||||
gmm-decode-faster --beam=$beam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $model $graph "$feats" ark,t:$dir/$job.tra ark,t:$dir/$job.ali 2>>$dir/decode$job.log
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# tri2m_vtln is doing normal, feature-level VTLN (with diagonal
|
||||
# fMLLR).
|
||||
|
||||
# This script does the decoding of a single batch of test data (on one core).
|
||||
# It requires arguments. It takes the graphdir and decoding directory, and the
|
||||
# job number. It expects a file $decode_dir/test${job_number}.scp to exist, and
|
||||
# puts its output in $decode_dir/${job_number}.tra
|
||||
#
|
||||
# If the files
|
||||
# $decode_dir/${job_number}.utt2spk and $decode_dir/${job_number}.spk2utt exist,
|
||||
# this script will assume you want to do per-speaker (not per-utterance) adaptation.
|
||||
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: scripts/decode_tri2m.sh <graph> <decode-dir> <job-number>"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
. path.sh || exit 1;
|
||||
|
||||
acwt=0.0625
|
||||
beam=13.0
|
||||
mincount=100 # for fMLLR
|
||||
prebeam=12.0 # first-pass decoding beam...
|
||||
max_active=7000
|
||||
alimodel=exp/tri2m/final.alimdl # first-pass model...
|
||||
model=exp/tri2m/final.mdl # Model to decide which transform to use.
|
||||
vtlnmodel=exp/tri2m/final.vtlnmdl
|
||||
lvtln=exp/tri2m/0.lvtln
|
||||
mat=exp/tri2f/final.mat
|
||||
silphones=`cat data/silphones.csl`
|
||||
graph=$1
|
||||
dir=$2
|
||||
job=$3
|
||||
scp=$dir/$job.scp
|
||||
sifeats="ark:splice-feats --print-args=false scp:$scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
if [ -f $dir/$job.spk2utt ]; then
|
||||
if [ ! -f $dir/$job.utt2spk ]; then
|
||||
echo "spk2utt but not utt2spk file present!"
|
||||
exit 1
|
||||
fi
|
||||
spk2utt_opt=--spk2utt=ark:$dir/$job.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:$dir/$job.utt2spk
|
||||
fi
|
||||
|
||||
# if we can't find $dir/${job}_wav.scp, then user didn't give --wav option
|
||||
# to scripts/decode.sh
|
||||
filenames="$scp $model $alimodel $et $graph data/words.txt $dir/${job}_wav.scp"
|
||||
for file in $filenames; do
|
||||
if [ ! -f $file ] ; then
|
||||
echo "No such file $file";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
echo running on `hostname` > $dir/predecode${job}.log
|
||||
|
||||
# First-pass decoding
|
||||
|
||||
gmm-decode-faster --beam=$prebeam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $alimodel $graph "$sifeats" ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali 2>>$dir/predecode${job}.log
|
||||
|
||||
# Estimate transforms
|
||||
ali-to-post ark:$dir/$job.pre_ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alimodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $alimodel "$sifeats" ark,o:- ark:- | \
|
||||
gmm-est-lvtln-trans $spk2utt_opt $model $lvtln "$sifeats" ark,o:- \
|
||||
ark:$dir/$job.trans ark,t:$dir/$job.warp 2>$dir/lvtln${job}.log
|
||||
|
||||
# Compute warping factor
|
||||
cat $dir/$job.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/$job.factor
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/$job.factor --config=conf/mfcc.conf scp:$dir/${job}_wav.scp ark:- | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
( ali-to-post ark:$dir/$job.pre_ali ark:- | \
|
||||
weight-silence-post 0.0 $silphones $alimodel ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-min-count=$mincount --fmllr-update-type=diag $spk2utt_opt $model "$feats" ark,o:- ark:$dir/$job.trans ) 2>$dir/fmllr${job}.log || exit 1;
|
||||
|
||||
feats="ark:compute-mfcc-feats $utt2spk_opt --vtln-low=100 --vtln-high=-600 --vtln-map=ark:$dir/$job.factor --config=conf/mfcc.conf scp:$dir/${job}_wav.scp ark:- | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- | transform-feats $utt2spk_opt ark:$dir/$job.trans ark:- ark:- |"
|
||||
|
||||
# Final decoding
|
||||
echo running on `hostname` > $dir/decode$job.log
|
||||
gmm-decode-faster --beam=$beam --max-active=$max_active --acoustic-scale=$acwt --word-symbol-table=data/words.txt $model $graph "$feats" ark,t:$dir/$job.tra ark,t:$dir/$job.ali 2>>$dir/decode$job.log
|
||||
|
|
@ -41,3 +41,4 @@ wait;
|
|||
|
||||
cat $root_out/train_raw_mfcc{1,2,3,4}.scp > data/train.scp
|
||||
|
||||
echo Succeeded "(probably)"
|
||||
|
|
|
@ -54,8 +54,8 @@ compute-cmvn-stats $spk2utt_opt "$srcfeats" ark:$dir/cmvn.ark 2>$dir/cmvn.log
|
|||
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- | apply-cmvn $utt2spk_opt ark:$dir/cmvn.ark ark:- ark:- |"
|
||||
|
||||
for n in 1 2 3; do
|
||||
srcfeatspart[$n]="ark:add-deltas --print-args=false scp:$dir/train${n}.scp ark:- |"
|
||||
featspart[$n]="ark:add-deltas --print-args=false scp:$dir/train${n}.scp ark:- | apply-cmvn $utt2spk_opt ark:$dir/cmvn.ark ark:- ark:- |"
|
||||
srcfeatspart[$n]="ark,s,cs:add-deltas --print-args=false scp:$dir/train${n}.scp ark:- |"
|
||||
featspart[$n]="ark,s,cs:add-deltas --print-args=false scp:$dir/train${n}.scp ark:- | apply-cmvn $utt2spk_opt ark:$dir/cmvn.ark ark:- ark:- |"
|
||||
done
|
||||
|
||||
cp $srcdir/topo $dir
|
||||
|
|
|
@ -283,7 +283,7 @@ fi
|
|||
|
||||
|
||||
# The following files may be be useful for display purposes.
|
||||
for y in lvtln_iters; do
|
||||
for y in $lvtln_iters; do
|
||||
cat $dir/$y.?.warp | scripts/process_warps.pl data/spk2gender.map > $dir/warps.$y
|
||||
y=$[$y+1]
|
||||
done
|
||||
|
|
|
@ -0,0 +1,292 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# tri2m is as tri2g ("linear VTLN", and training normal VTLN at the end),
|
||||
# except basing it on LDA+MLLT features, not deltas.
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
dir=exp/tri2m
|
||||
srcdir=exp/tri2f
|
||||
srcmodel=$srcdir/final.mdl
|
||||
mat=$srcdir/final.mat
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
|
||||
# This block of parameters relates to LVTLN.
|
||||
compute_vtlnmdl=true # If true, at the end compute a model with actual feature-space
|
||||
# VTLN features. You can decode with this as an alternative to
|
||||
# final.mdl which takes the LVTLN features.
|
||||
dim=40 # the dim of our features.
|
||||
lvtln_iters="2 4 6 8 12"; # Recompute LVTLN transforms on these iters.
|
||||
numfiles=40 # Number of feature files for computing LVTLN transforms.
|
||||
numclass=31; # Can't really change this without changing the script below
|
||||
defaultclass=15; # Corresponds to no warping.
|
||||
|
||||
|
||||
numiters=35
|
||||
maxiterinc=20 # By this iter, we have all the Gaussians.
|
||||
realign_iters="10 20 30";
|
||||
numleaves=2000
|
||||
numgauss=2000 # initial num-gauss smallish so that transform-training
|
||||
# code (when we modify this script) is a bit faster.
|
||||
totgauss=10000 # Total num-gauss
|
||||
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
|
||||
|
||||
silphonelist=`cat data/silphones.csl`
|
||||
|
||||
mkdir -p $dir
|
||||
cp $srcdir/train.scp $dir
|
||||
cp $srcdir/train.tra $dir
|
||||
|
||||
scripts/filter_scp.pl $dir/train.scp data/train_wav.scp > $dir/train_wav.scp
|
||||
scripts/filter_scp.pl $dir/train.scp data/train.utt2spk > $dir/train.utt2spk
|
||||
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.scp
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train_wav{,1,2,3}.scp
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.tra
|
||||
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.utt2spk
|
||||
|
||||
|
||||
for n in 1 2 3 ""; do # The "" handles the un-split one. Creating spk2utt files..
|
||||
scripts/utt2spk_to_spk2utt.pl $dir/train$n.utt2spk > $dir/train$n.spk2utt
|
||||
done
|
||||
|
||||
# also see featspart below, used for sub-parts of the features;
|
||||
# try to keep them in sync.
|
||||
feats="ark,s,cs:splice-feats --print-args=false scp:$dir/train.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark:cat $dir/cur?.trans|\" ark:- ark:- |"
|
||||
srcfeats="ark,s,cs:splice-feats --print-args=false scp:$dir/train.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
for n in 1 2 3; do
|
||||
featspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $mat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk ark:$dir/cur$n.trans ark:- ark:- |"
|
||||
srcfeatspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $mat ark:- ark:- |"
|
||||
done
|
||||
|
||||
cp $srcdir/topo $dir
|
||||
|
||||
|
||||
|
||||
gmm-init-lvtln --dim=$dim --num-classes=$numclass --default-class=$defaultclass \
|
||||
$dir/0.lvtln 2>$dir/init_lvtln.log || exit 1
|
||||
|
||||
# Small subset of features for initializing the LVTLN.
|
||||
|
||||
featsub="ark:scripts/subset_scp.pl $numfiles $dir/train.scp | splice-feats scp:- ark:- | transform-feats $mat ark:- ark:- |"
|
||||
|
||||
echo "Initializing lvtln transforms."
|
||||
c=0
|
||||
while [ $c -lt $numclass ]; do
|
||||
warp=`perl -e 'print 0.85 + 0.01*$ARGV[0];' $c`
|
||||
featsub_warp="ark:scripts/subset_scp.pl $numfiles $dir/train_wav.scp | compute-mfcc-feats --vtln-low=100 --vtln-high=-600 --vtln-warp=$warp --config=conf/mfcc.conf scp:- ark:- | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- |"
|
||||
gmm-train-lvtln-special --normalize-var=true $c $dir/0.lvtln $dir/0.lvtln \
|
||||
"$featsub" "$featsub_warp" 2> $dir/train_special.$c.log || exit 1;
|
||||
c=$[$c+1]
|
||||
done
|
||||
|
||||
|
||||
# Align all training data using old model (and old graphs, since we
|
||||
# use the same data-subset as last time).
|
||||
# Note: a few fail to get aligned here due to the difference between
|
||||
# per-speaker and per-utterance splitting, but this doesn't really matter.
|
||||
|
||||
echo "Aligning all training data"
|
||||
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel \
|
||||
"ark:gunzip -c $srcdir/graphs${n}.fsts.gz|" "${srcfeatspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/0.${n}.ali.gz" \
|
||||
2> $dir/align.0.${n}.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo alignment error RE old system && exit 1
|
||||
|
||||
echo "Computing LVTLN transforms (iter 0)"
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/0.$n.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $srcmodel ark:- ark:- | \
|
||||
gmm-post-to-gpost $srcmodel "${srcfeatspart[$n]}" ark:- ark:- | \
|
||||
gmm-est-lvtln-trans --verbose=1 --spk2utt=ark:$dir/train$n.spk2utt $srcmodel $dir/0.lvtln \
|
||||
"${srcfeatspart[$n]}" ark:- ark:$dir/cur$n.trans ark,t:$dir/0.$n.warp ) \
|
||||
2>$dir/lvtln.0.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo error computing LVTLN transforms on iter 0 && exit 1
|
||||
|
||||
|
||||
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" "ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
|
||||
|
||||
|
||||
# The next few commands are involved with making the questions
|
||||
# for tree clustering. The extra complexity vs. the RM recipe has
|
||||
# to do with the desire to ask questions about the "real" phones
|
||||
# ignoring things like stress and position-in-word, and ask questions
|
||||
# separately about stress and position-in-word.
|
||||
|
||||
# Don't include silences as things to be clustered -> --nosil option.
|
||||
scripts/make_shared_phones.sh --nosil | scripts/sym2int.pl data/phones.txt > $dir/phone_sets.list
|
||||
cluster-phones $dir/treeacc $dir/phone_sets.list $dir/questions.txt 2> $dir/cluster_phones.log || exit 1;
|
||||
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
|
||||
scripts/make_extra_questions.sh | cat $dir/questions_syms.txt - > $dir/questions_syms_all.txt
|
||||
scripts/sym2int.pl data/phones.txt < $dir/questions_syms_all.txt > $dir/questions_all.txt
|
||||
|
||||
compile-questions $dir/topo $dir/questions_all.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
|
||||
|
||||
scripts/make_roots.sh > $dir/roots_syms.txt
|
||||
scripts/sym2int.pl --ignore-oov data/phones.txt < $dir/roots_syms.txt > $dir/roots.txt
|
||||
|
||||
|
||||
build-tree --verbose=1 --max-leaves=$numleaves \
|
||||
$dir/treeacc $dir/roots.txt \
|
||||
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
|
||||
|
||||
gmm-init-model --write-occs=$dir/1.occs \
|
||||
$dir/tree $dir/treeacc $dir/topo $dir/1.mdl 2> $dir/init_model.log || exit 1;
|
||||
|
||||
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
|
||||
2>$dir/mixup.log || exit 1;
|
||||
|
||||
|
||||
rm $dir/treeacc $dir/1.occs
|
||||
|
||||
|
||||
# Convert alignments generated from previous model, to use as initial alignments.
|
||||
|
||||
for n in 1 2 3; do
|
||||
convert-ali $srcmodel $dir/1.mdl $dir/tree \
|
||||
"ark:gunzip -c $dir/0.$n.ali.gz|" \
|
||||
"ark:|gzip -c > $dir/cur$n.ali.gz" 2>$dir/convert.$n.log || exit 1;
|
||||
done
|
||||
rm $dir/0.?.ali.gz
|
||||
|
||||
# Make training graphs
|
||||
echo "Compiling training graphs"
|
||||
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
compile-train-graphs $dir/tree $dir/1.mdl data/L.fst ark:$dir/train${n}.tra \
|
||||
"ark:|gzip -c > $dir/graphs${n}.fsts.gz" \
|
||||
2>$dir/compile_graphs.${n}.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo compile-graphs error && exit 1
|
||||
|
||||
|
||||
x=1
|
||||
while [ $x -lt $numiters ]; do
|
||||
echo "Pass $x"
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Aligning data"
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
|
||||
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|
||||
|| touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo error aligning data && exit 1
|
||||
fi
|
||||
if echo $lvtln_iters | grep -w $x >/dev/null; then
|
||||
# Work out current transforms (in parallel).
|
||||
echo "Computing LVTLN transforms"
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
gmm-post-to-gpost $dir/$x.mdl "${featspart[$n]}" ark,o:- ark:- | \
|
||||
gmm-est-lvtln-trans --spk2utt=ark:$dir/train$n.spk2utt --verbose=1 $dir/$x.mdl $dir/0.lvtln \
|
||||
"${srcfeatspart[$n]}" ark,s,cs:- ark:$dir/tmp$n.trans ark,t:$dir/$x.$n.warp ) \
|
||||
2> $dir/trans.$x.$n.log && mv $dir/tmp$n.trans $dir/cur$n.trans \
|
||||
|| touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo error aligning data && exit 1
|
||||
fi
|
||||
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" \
|
||||
"ark,s,cs:gunzip -c $dir/cur?.ali.gz|" $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
|
||||
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
|
||||
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
|
||||
|
||||
if [ $x -le $maxiterinc ]; then
|
||||
numgauss=$[$numgauss+$incgauss];
|
||||
fi
|
||||
x=$[$x+1];
|
||||
done
|
||||
|
||||
# Accumulate stats for "alignment model" which is as the model but with
|
||||
# the baseline features (shares Gaussian-level alignments).
|
||||
( ali-to-post "ark:gunzip -c $dir/cur?.ali.gz|" ark:- | \
|
||||
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$srcfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
|
||||
# Update model.
|
||||
gmm-est --remove-low-count-gaussians=false $dir/$x.mdl $dir/$x.acc2 $dir/$x.alimdl \
|
||||
2>$dir/est_alimdl.log || exit 1;
|
||||
rm $dir/$x.acc2
|
||||
|
||||
|
||||
if [ $compute_vtlnmdl == "true" ]; then
|
||||
iter=`echo 0 $lvtln_iters | awk '{print $NF}'` # last iter we re-estimated LVTLN
|
||||
rm -f $dir/.error
|
||||
for n in 1 2 3; do
|
||||
cat $dir/$iter.$n.warp | awk '{print $1, (0.85+0.01*$2);}' > $dir/cur$n.factor
|
||||
compute-mfcc-feats --utt2spk=ark:$dir/train$n.utt2spk --vtln-low=100 --vtln-high=-600 \
|
||||
--vtln-map=ark:$dir/cur$n.factor --config=conf/mfcc.conf \
|
||||
scp:$dir/train_wav$n.scp ark:$dir/tmp$n.ark 2>$dir/mfcc.$n.log \
|
||||
|| touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo error computing VTLN-warped MFCC features && exit 1
|
||||
|
||||
# Compute diagonal fMLLR transform to normalize VTLN feats.
|
||||
# (note, this is a bit stronger than the mean-only transform we used for the LVTLN stuff,
|
||||
# LVTLN also globally normalized the variance of each warp factor, so this seems
|
||||
# appropriate).
|
||||
for n in 1 2 3; do
|
||||
vtlnfeats="ark:splice-feats ark:$dir/tmp$n.ark ark:- | transform-feats $mat ark:- ark:- |"
|
||||
( ali-to-post "ark:gunzip -c $dir/cur$n.ali.gz|" ark:- | \
|
||||
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-update-type=diag --spk2utt=ark:$dir/train$n.spk2utt \
|
||||
$dir/$x.mdl "$vtlnfeats" ark,o:- ark:$dir/vtln$n.trans ) \
|
||||
2>$dir/vtln_fmllr.$n.log || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo error computing fMLLR transforms after VTLN && exit 1
|
||||
|
||||
# all the features, with diagonal fMLLR
|
||||
vtlnfeats="ark:cat $dir/tmp?.ark | splice-feats ark:- ark:- | transform-feats $mat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark:cat $dir/vtln?.trans|\" ark:- ark:- |"
|
||||
|
||||
( ali-to-post "ark:gunzip -c $dir/cur?.ali.gz|" ark:- | \
|
||||
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$vtlnfeats" ark,s,cs:- $dir/$x.acc3 ) 2>$dir/acc_vtlnmdl.log || exit 1;
|
||||
# Update model.
|
||||
gmm-est $dir/$x.mdl $dir/$x.acc3 $dir/$x.vtlnmdl \
|
||||
2>$dir/est_vtlnmdl.log || exit 1;
|
||||
rm $dir/$x.acc3
|
||||
rm $dir/final.alimdl 2>/dev/null
|
||||
ln -s $x.vtlnmdl $dir/final.vtlnmdl
|
||||
rm $dir/tmp?.ark
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# The following files may be be useful for display purposes.
|
||||
for y in $lvtln_iters; do
|
||||
cat $dir/$y.?.warp | scripts/process_warps.pl data/spk2gender.map > $dir/warps.$y
|
||||
y=$[$y+1]
|
||||
done
|
||||
|
||||
|
||||
( cd $dir; rm final.{mdl,alimdl,et} 2>/dev/null;
|
||||
ln -s $x.mdl final.mdl; ln -s $x.alimdl final.alimdl;
|
||||
ln -s $numiters_et.et final.et )
|
|
@ -43,7 +43,7 @@ BaseFloat DecodableAmDiagGmmUnmapped::LogLikelihoodZeroBased(
|
|||
// check if everything is in order
|
||||
if (pdf.Dim() != data.Dim()) {
|
||||
KALDI_ERR << "Dim mismatch: data dim = " << data.Dim()
|
||||
<< "vs. model dim = " << pdf.Dim();
|
||||
<< " vs. model dim = " << pdf.Dim();
|
||||
}
|
||||
if (!pdf.valid_gconsts()) {
|
||||
KALDI_ERR << "State " << (state) << ": Must call ComputeGconsts() "
|
||||
|
@ -96,7 +96,7 @@ BaseFloat DecodableAmDiagGmmRegtreeFmllr::LogLikelihoodZeroBased(int32 frame,
|
|||
// check if everything is in order
|
||||
if (pdf.Dim() != data.Dim()) {
|
||||
KALDI_ERR << "Dim mismatch: data dim = " << data.Dim()
|
||||
<< "vs. model dim = " << pdf.Dim();
|
||||
<< " vs. model dim = " << pdf.Dim();
|
||||
}
|
||||
if (!pdf.valid_gconsts()) {
|
||||
KALDI_ERR << "State " << (state) << ": Must call ComputeGconsts() "
|
||||
|
@ -252,7 +252,7 @@ BaseFloat DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased(int32 frame,
|
|||
// check if everything is in order
|
||||
if (pdf.Dim() != data.Dim()) {
|
||||
KALDI_ERR << "Dim mismatch: data dim = " << data.Dim()
|
||||
<< "vs. model dim = " << pdf.Dim();
|
||||
<< " vs. model dim = " << pdf.Dim();
|
||||
}
|
||||
|
||||
if (frame != previous_frame_) { // cache the squared stats.
|
||||
|
|
Загрузка…
Ссылка в новой задаче