Fixes to HTK model conversion (and minor script changes).

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@58 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2011-05-30 12:04:57 +00:00
Родитель b25ec1fdc5
Коммит 81c7a674f0
7 изменённых файлов: 86 добавлений и 21 удалений

Просмотреть файл

@ -31,11 +31,17 @@ exp/decode_tri2h/wer:Average WER is 4.252773 (533 / 12533) # Splice-9-frames + H
exp/decode_tri2i/wer:Average WER is 4.077236 (511 / 12533) # Triple-deltas + HLDA
exp/decode_tri2j/wer:Average WER is 3.694247 (463 / 12533) # Triple-deltas + LDA + MLLT
exp/decode_tri2k/wer:Average WER is 2.768691 (347 / 12533) # LDA + exponential transform
exp/decode_tri2k_utt/wer:Average WER is 3.024017 (379 / 12533) # per-utterance adaptation.
exp/decode_tri2k_fmllr/wer:Average WER is 2.481449 (311 / 12533) # + fMLLR
exp/decode_tri2l/wer:Average WER is 2.688901 (337 / 12533) # Splice-9-frames + LDA + MLLT + SAT (fMLLR in test)
exp/decode_tri2l_utt/wer:Average WER is 5.066624 (635 / 12533) # [ as decode_tri2l but per-utt in test. ]
exp/decode_sgmma/wer:Average WER is 3.151680 (395 / 12533)
exp/decode_sgmma_fmllr/wer:Average WER is 2.768691 (347 / 12533)
exp/decode_sgmmb/wer:Average WER is 2.680922 (336 / 12533)
exp/decode_sgmmb_fmllr/wer:Average WER is 2.537302 (318 / 12533)
exp/decode_tri2a/wer:Average WER is 4.476183 (561 / 12533)

Просмотреть файл

@ -0,0 +1,69 @@
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# SGMM decoding with adaptation.
#
# SGMM decoding; use a different acoustic scale from normal (0.1 vs 0.08333)
# (1) decode with "alignment model"
# (2) get GMM posteriors with "alignment model" and estimate speaker
# vectors with final model
# (3) decode with final model.
if [ -f path.sh ]; then . path.sh; fi
dir=exp/decode_sgmmb_utt
tree=exp/sgmmb/tree
model=exp/sgmmb/final.mdl
alimodel=exp/sgmmb/final.alimdl
graphdir=exp/graph_sgmmb
silphonelist=`cat data/silphones.csl`
mkdir -p $dir
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
#spk2utt_opt="--spk2utt=ark:data/test_${test}.spk2utt"
#utt2spk_opt="--utt2spk=ark:data/test_${test}.utt2spk"
sgmm-gselect $model "$feats" ark,t:- 2>$dir/gselect.log | \
gzip -c > $dir/${test}_gselect.gz || exit 1;
gselect_opt="--gselect=ark:gunzip -c $dir/${test}_gselect.gz|"
# Use smaller beam first time.
sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $alimodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.pre_tra ark,t:$dir/test_${test}.pre_ali 2> $dir/predecode_${test}.log
( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
ark:$dir/test_${test}.vecs ) 2>$dir/vecs_${test}.log
sgmm-decode-faster $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -1,6 +1,6 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Copyright 2010-2011 Microsoft Corporation Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

2
misc/htk_conversion/convert_htk.sh Normal file → Executable file
Просмотреть файл

@ -109,7 +109,7 @@ dim=`grep -w MEAN $mmf | head -1 | awk '{print $2}'` # probably 39.
convert_states.pl $dim < states2.txt > kaldi.am_gmm
gmm-init-trans kaldi.topo kaldi.am_gmm kaldi.tree kaldi.mdl
gmm-init-trans kaldi.topo kaldi.am_gmm kaldi.tree kaldi.mdl || exit 1;
# clean up:

Просмотреть файл

@ -19,18 +19,18 @@ while(<>) {
if($A[1] != 1.0) {
print STDERR "Warning: phone $phone seems not to be normal topology: result may not be correct.\n";
}
} elsif($n < $numstates) { # last line is all zeros, ignore it.
} else {
$nm2 = $n-2; # Kaldi-numbered state, 2 less than HTK one.
print " <State> $nm2 <PdfClass> $nm2\n";
if($n < $numstates) {
print " <State> $nm2 <PdfClass> $nm2\n";
} else {
print " <State> $nm2\n";
}
# The next few lines are just a sanity check-- that we have the "normal" topology.
for($p = 0; $p < $numstates; $p++) {
if($A[$p] != 0) {
$deststate = $p-1; # in kaldi numbering.
if($deststate == $numstates-2) { # final-state, in kaldi format.
print " <Final> $A[$p] ";
} else {
print " <Transition> $deststate $A[$p]\n";
}
print " <Transition> $deststate $A[$p]\n";
}
}
print " </State>\n";

Просмотреть файл

@ -29,17 +29,6 @@ $N = 3;
$P = 1;
print "ContextDependency $N $P\n";
$np = $maxphone+1;
print "ToLength TE 1 $np\n" ; # printing out to-length map.. 1==split-on-central-position;
# $np is size of array in table-event-map.
print " ( ";
for($p = 0; $p < $np; $p++) {
if(defined $len{$p}) {
print "CE $len{$p} ";
} else {
print "NULL ";
}
}
print ")\n";
# printing out to-pdf map.. 1==split-on-central-position;
# $np is size of array in table-event-map.
print "ToPdf TE 1 $np (\n";

Просмотреть файл

@ -161,7 +161,8 @@ void ContextDependency::Read (std::istream &is, bool binary) {
EventMap *to_num_pdf_classes = EventMap::Read(is, binary);
if (to_num_pdf_classes) delete to_num_pdf_classes;
ReadMarker(is, binary, &marker);
} else if (marker == "ToPdf") {
}
if (marker == "ToPdf") {
to_pdf = EventMap::Read(is , binary);
} else {
KALDI_ERR << "Got unexpected marker " << marker