зеркало из https://github.com/mozilla/kaldi.git
sandbox/dan2: merging changes from trunk; some further small code-level optimizations to determinization code (which I just realized were done in sandbox/dan2; I'll now merge those back to trunk.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3087 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
4da87782bc
Коммит
0bc71728a2
|
@ -0,0 +1,77 @@
|
|||
# include common settings for limitedLP systems.
|
||||
. conf/common.limitedLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list
|
||||
train_nj=16
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
|
||||
dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list
|
||||
dev2h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
|
||||
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
|
||||
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
|
||||
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
|
||||
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=23
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list
|
||||
dev10h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
|
||||
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
|
||||
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
|
||||
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
|
||||
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
|
||||
dev10h_nj=32
|
||||
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval
|
||||
eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
|
||||
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.ecf.xml
|
||||
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.kwlist2.xml
|
||||
eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=2500
|
||||
numGaussTri2=36000
|
||||
numLeavesTri3=2500
|
||||
numGaussTri3=36000
|
||||
numLeavesMLLT=2500
|
||||
numGaussMLLT=36000
|
||||
numLeavesSAT=2500
|
||||
numGaussSAT=36000
|
||||
numGaussUBM=750
|
||||
numLeavesSGMM=5000
|
||||
numGaussSGMM=18000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
|
||||
use_ffv=true
|
||||
use_pitch=true
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/reference_materials/lexicon.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
# include common settings for fullLP systems.
|
||||
. conf/common.fullLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list
|
||||
train_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
|
||||
dev2h_data_cmudb=
|
||||
dev2h_stm_file=
|
||||
dev2h_ecf_file=
|
||||
dev2h_rttm_file=
|
||||
dev2h_kwlist_file=
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=20
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
|
||||
dev10h_data_cmudb=
|
||||
dev10h_stm_file=
|
||||
dev10h_ecf_file=
|
||||
dev10h_rttm_file=
|
||||
dev10h_kwlist_file=
|
||||
dev10h_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_sph_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev10h_sph_data_list=/export/babel/data/splits/Haitian_Babel201/dev.sph.list
|
||||
dev10h_sph_data_cmudb=
|
||||
dev10h_sph_stm_file=
|
||||
dev10h_sph_ecf_file=
|
||||
dev10h_sph_rttm_file=
|
||||
dev10h_sph_kwlist_file=
|
||||
dev10h_sph_subset_ecf=true
|
||||
dev10h_sph_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_wav_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev10h_wav_data_list=/export/babel/data/splits/Haitian_Babel201/dev.wav.list
|
||||
dev10h_wav_data_cmudb=
|
||||
dev10h_wav_stm_file=
|
||||
dev10h_wav_ecf_file=
|
||||
dev10h_wav_rttm_file=
|
||||
dev10h_wav_kwlist_file=
|
||||
dev10h_wav_subset_ecf=true
|
||||
dev10h_wav_nj=13
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval
|
||||
eval_data_list=
|
||||
eval_ecf_file=
|
||||
eval_kwlist_file=
|
||||
eval_data_cmudb=
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=1000
|
||||
numGaussTri2=20000
|
||||
numLeavesTri3=6000
|
||||
numGaussTri3=75000
|
||||
numLeavesMLLT=6000
|
||||
numGaussMLLT=75000
|
||||
numLeavesSAT=6000
|
||||
numGaussSAT=75000
|
||||
numGaussUBM=800
|
||||
numLeavesSGMM=10000
|
||||
numGaussSGMM=80000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
use_pitch=false
|
||||
use_ffv=false
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
# include common settings for fullLP systems.
|
||||
. conf/common.fullLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list
|
||||
train_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
|
||||
dev2h_data_cmudb=
|
||||
dev2h_stm_file=
|
||||
dev2h_ecf_file=
|
||||
dev2h_rttm_file=
|
||||
dev2h_kwlist_file=
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=20
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
|
||||
dev10h_data_cmudb=
|
||||
dev10h_stm_file=
|
||||
dev10h_ecf_file=
|
||||
dev10h_rttm_file=
|
||||
dev10h_kwlist_file=
|
||||
dev10h_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_sph_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev10h_sph_data_list=/export/babel/data/splits/Haitian_Babel201/dev.sph.list
|
||||
dev10h_sph_data_cmudb=
|
||||
dev10h_sph_stm_file=
|
||||
dev10h_sph_ecf_file=
|
||||
dev10h_sph_rttm_file=
|
||||
dev10h_sph_kwlist_file=
|
||||
dev10h_sph_subset_ecf=true
|
||||
dev10h_sph_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_wav_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev10h_wav_data_list=/export/babel/data/splits/Haitian_Babel201/dev.wav.list
|
||||
dev10h_wav_data_cmudb=
|
||||
dev10h_wav_stm_file=
|
||||
dev10h_wav_ecf_file=
|
||||
dev10h_wav_rttm_file=
|
||||
dev10h_wav_kwlist_file=
|
||||
dev10h_wav_subset_ecf=true
|
||||
dev10h_wav_nj=13
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval
|
||||
eval_data_list=
|
||||
eval_ecf_file=
|
||||
eval_kwlist_file=
|
||||
eval_data_cmudb=
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=1000
|
||||
numGaussTri2=20000
|
||||
numLeavesTri3=6000
|
||||
numGaussTri3=75000
|
||||
numLeavesMLLT=6000
|
||||
numGaussMLLT=75000
|
||||
numLeavesSAT=6000
|
||||
numGaussSAT=75000
|
||||
numGaussUBM=800
|
||||
numLeavesSGMM=10000
|
||||
numGaussSGMM=80000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
use_pitch=true
|
||||
use_ffv=true
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
# include common settings for limitedLP systems.
|
||||
. conf/common.limitedLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Haitian_Babel201/train.LimitedLP.list
|
||||
train_nj=16
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
|
||||
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
|
||||
dev2h_data_cmudb=
|
||||
dev2h_stm_file=
|
||||
dev2h_ecf_file=
|
||||
dev2h_rttm_file=
|
||||
dev2h_kwlist_file=
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=20
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201//dev.list
|
||||
dev10h_data_cmudb=
|
||||
dev10h_stm_file=
|
||||
dev10h_ecf_file=
|
||||
dev10h_rttm_file=
|
||||
dev10h_kwlist_file=
|
||||
dev10h_nj=32
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/201-haitian//release-current/conversational/eval
|
||||
eval_data_list=
|
||||
eval_ecf_file=
|
||||
eval_kwlist_file=
|
||||
eval_data_cmudb=
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=2500
|
||||
numGaussTri2=36000
|
||||
numLeavesTri3=2500
|
||||
numGaussTri3=36000
|
||||
numLeavesMLLT=2500
|
||||
numGaussMLLT=36000
|
||||
numLeavesSAT=2500
|
||||
numGaussSAT=36000
|
||||
numGaussUBM=750
|
||||
numLeavesSGMM=5000
|
||||
numGaussSGMM=18000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
|
||||
use_pitch=true
|
||||
use_ffv=true
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.sub-train.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -85,8 +85,8 @@ numGaussSGMM=80000
|
|||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
use_pitch=true
|
||||
use_ffv=true
|
||||
use_pitch=false
|
||||
use_ffv=false
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
# include common settings for fullLP systems.
|
||||
. conf/common.fullLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Zulu_Babel206/train.FullLP.list
|
||||
train_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
|
||||
dev2h_data_cmudb=
|
||||
dev2h_stm_file=
|
||||
dev2h_ecf_file=
|
||||
dev2h_rttm_file=
|
||||
dev2h_kwlist_file=
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=20
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
|
||||
dev10h_data_cmudb=
|
||||
dev10h_stm_file=
|
||||
dev10h_ecf_file=
|
||||
dev10h_rttm_file=
|
||||
dev10h_kwlist_file=
|
||||
dev10h_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
|
||||
dev10h_sph_data_cmudb=
|
||||
dev10h_sph_stm_file=
|
||||
dev10h_sph_ecf_file=
|
||||
dev10h_sph_rttm_file=
|
||||
dev10h_sph_kwlist_file=
|
||||
dev10h_sph_subset_ecf=true
|
||||
dev10h_sph_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
|
||||
dev10h_wav_data_cmudb=
|
||||
dev10h_wav_stm_file=
|
||||
dev10h_wav_ecf_file=
|
||||
dev10h_wav_rttm_file=
|
||||
dev10h_wav_kwlist_file=
|
||||
dev10h_wav_subset_ecf=true
|
||||
dev10h_wav_nj=13
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
|
||||
eval_data_list=
|
||||
eval_ecf_file=
|
||||
eval_kwlist_file=
|
||||
eval_data_cmudb=
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=1000
|
||||
numGaussTri2=20000
|
||||
numLeavesTri3=6000
|
||||
numGaussTri3=75000
|
||||
numLeavesMLLT=6000
|
||||
numGaussMLLT=75000
|
||||
numLeavesSAT=6000
|
||||
numGaussSAT=75000
|
||||
numGaussUBM=800
|
||||
numLeavesSGMM=10000
|
||||
numGaussSGMM=80000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
use_pitch=true
|
||||
use_ffv=true
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -37,6 +37,28 @@ devtrain_rttm_file=
|
|||
devtrain_kwlist_file=
|
||||
devtrain_nj=64
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
|
||||
dev10h_sph_data_cmudb=
|
||||
dev10h_sph_stm_file=
|
||||
dev10h_sph_ecf_file=
|
||||
dev10h_sph_rttm_file=
|
||||
dev10h_sph_kwlist_file=
|
||||
dev10h_sph_subset_ecf=true
|
||||
dev10h_sph_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
|
||||
dev10h_wav_data_cmudb=
|
||||
dev10h_wav_stm_file=
|
||||
dev10h_wav_ecf_file=
|
||||
dev10h_wav_rttm_file=
|
||||
dev10h_wav_kwlist_file=
|
||||
dev10h_wav_subset_ecf=true
|
||||
dev10h_wav_nj=13
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
|
||||
eval_data_list=
|
||||
|
@ -74,8 +96,8 @@ numGaussSGMM=18000
|
|||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
|
||||
use_pitch=true
|
||||
use_ffv=true
|
||||
use_pitch=false
|
||||
use_ffv=false
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
# include common settings for limitedLP systems.
|
||||
. conf/common.limitedLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Zulu_Babel206/train.LimitedLP.list
|
||||
train_nj=16
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
|
||||
dev2h_data_cmudb=
|
||||
dev2h_stm_file=
|
||||
dev2h_ecf_file=
|
||||
dev2h_rttm_file=
|
||||
dev2h_kwlist_file=
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=20
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
|
||||
dev10h_data_cmudb=
|
||||
dev10h_stm_file=
|
||||
dev10h_ecf_file=
|
||||
dev10h_rttm_file=
|
||||
dev10h_kwlist_file=
|
||||
dev10h_nj=32
|
||||
|
||||
#RADICAL EVAL data files (difference between TRAIN-FULL TRAIN-LIMITED)
|
||||
devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
|
||||
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
|
||||
devtrain_data_cmudb=
|
||||
devtrain_stm_file=
|
||||
devtrain_ecf_file=
|
||||
devtrain_rttm_file=
|
||||
devtrain_kwlist_file=
|
||||
devtrain_nj=64
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
|
||||
dev10h_sph_data_cmudb=
|
||||
dev10h_sph_stm_file=
|
||||
dev10h_sph_ecf_file=
|
||||
dev10h_sph_rttm_file=
|
||||
dev10h_sph_kwlist_file=
|
||||
dev10h_sph_subset_ecf=true
|
||||
dev10h_sph_nj=32
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
|
||||
dev10h_wav_data_cmudb=
|
||||
dev10h_wav_stm_file=
|
||||
dev10h_wav_ecf_file=
|
||||
dev10h_wav_rttm_file=
|
||||
dev10h_wav_kwlist_file=
|
||||
dev10h_wav_subset_ecf=true
|
||||
dev10h_wav_nj=13
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
|
||||
eval_data_list=
|
||||
eval_ecf_file=
|
||||
eval_kwlist_file=
|
||||
eval_data_cmudb=
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=2500
|
||||
numGaussTri2=36000
|
||||
numLeavesTri3=2500
|
||||
numGaussTri3=36000
|
||||
numLeavesMLLT=2500
|
||||
numGaussMLLT=36000
|
||||
numLeavesSAT=2500
|
||||
numGaussSAT=36000
|
||||
numGaussUBM=750
|
||||
numLeavesSGMM=5000
|
||||
numGaussSGMM=18000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
|
||||
use_pitch=true
|
||||
use_ffv=true
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -15,7 +15,7 @@ dev2h_ecf_file=
|
|||
dev2h_rttm_file=
|
||||
dev2h_kwlist_file=
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=20
|
||||
dev2h_nj=18
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
|
||||
|
@ -27,6 +27,16 @@ dev10h_rttm_file=
|
|||
dev10h_kwlist_file=
|
||||
dev10h_nj=32
|
||||
|
||||
#RADICAL EVAL data files (difference between TRAIN-FULL TRAIN-LIMITED)
|
||||
devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
|
||||
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.train.list
|
||||
devtrain_data_cmudb=
|
||||
devtrain_stm_file=
|
||||
devtrain_ecf_file=
|
||||
devtrain_rttm_file=
|
||||
devtrain_kwlist_file=
|
||||
devtrain_nj=64
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
|
||||
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
|
||||
|
|
|
@ -17,9 +17,10 @@ sub KeywordSort {
|
|||
}
|
||||
|
||||
my $Usage = <<EOU;
|
||||
This script reads a alignment.csv file and computes the oracle ATWV based on the
|
||||
oracle threshold. The duration of the search collection is supposed to be provided.
|
||||
In the Babel case, the duration should be half of the total audio duration.
|
||||
This script reads a alignment.csv file and computes the ATWV, OTWV, MTWV by
|
||||
sweeping the threshold. The duration of the search collection is supposed to be
|
||||
provided. In the Babel case, the duration should be half of the total audio
|
||||
duration.
|
||||
|
||||
The alignment.csv file is supposed to have the following fields for each line:
|
||||
language,file,channel,termid,term,ref_bt,ref_et,sys_bt,sys_et,sys_score,
|
||||
|
@ -42,7 +43,7 @@ GetOptions(
|
|||
|
||||
@ARGV == 1 || die $Usage;
|
||||
|
||||
# Workout the input/output source.
|
||||
# Works out the input/output source.
|
||||
my $alignment_in = shift @ARGV;
|
||||
|
||||
# Hash alignment file. For each instance we store a 3-dimension vector:
|
||||
|
@ -98,40 +99,66 @@ while (<A>) {
|
|||
}
|
||||
close(A);
|
||||
|
||||
# Work out the oracle ATWV by sweeping the threshold.
|
||||
# Works out the oracle ATWV by sweeping the threshold.
|
||||
my $atwv = 0.0;
|
||||
my $oracle_atwv = 0.0;
|
||||
my $otwv = 0.0;
|
||||
my %mtwv_sweep;
|
||||
foreach my $kwid (keys %keywords) {
|
||||
# Sort the instances by confidence score.
|
||||
my @instances = sort KeywordSort @{$alignment{$kwid}};
|
||||
my $local_oracle_atwv = 0.0;
|
||||
my $max_local_oracle_atwv = 0.0;
|
||||
my $local_otwv = 0.0;
|
||||
my $max_local_otwv = 0.0;
|
||||
my $local_atwv = 0.0;
|
||||
foreach my $instance (@instances) {
|
||||
my @ins = @{$instance};
|
||||
# Oracle ATWV.
|
||||
my $gain = 1.0 / $Ntrue{$kwid};
|
||||
my $cost = $beta / ($duration - $Ntrue{$kwid});
|
||||
# ATWV.
|
||||
if ($ins[1] == 1) {
|
||||
$local_oracle_atwv += 1.0 / $Ntrue{$kwid};
|
||||
$local_otwv += $gain;
|
||||
} else {
|
||||
$local_oracle_atwv -= $beta / ($duration - $Ntrue{$kwid});
|
||||
$local_otwv -= $cost;
|
||||
}
|
||||
if ($local_oracle_atwv > $max_local_oracle_atwv) {
|
||||
$max_local_oracle_atwv = $local_oracle_atwv;
|
||||
if ($local_otwv > $max_local_otwv) {
|
||||
$max_local_otwv = $local_otwv;
|
||||
}
|
||||
|
||||
# Original ATWV.
|
||||
# OTWV.
|
||||
if ($ins[2] == 1) {
|
||||
$local_atwv -= $beta / ($duration - $Ntrue{$kwid});
|
||||
$local_atwv -= $cost;
|
||||
} elsif ($ins[2] == 2) {
|
||||
$local_atwv += 1.0 / $Ntrue{$kwid};
|
||||
$local_atwv += $gain;
|
||||
}
|
||||
|
||||
# MTWV.
|
||||
for (my $threshold = 0.000; $threshold <= $ins[0]; $threshold += 0.001) {
|
||||
if ($ins[1] == 1) {
|
||||
$mtwv_sweep{$threshold} += $gain;
|
||||
} else {
|
||||
$mtwv_sweep{$threshold} -= $cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
$atwv += $local_atwv;
|
||||
$oracle_atwv += $max_local_oracle_atwv;
|
||||
$otwv += $max_local_otwv;
|
||||
}
|
||||
|
||||
# Works out the MTWV.
|
||||
my $mtwv = 0.0;
|
||||
my $mtwv_threshold = 0.0;
|
||||
for my $threshold (keys %mtwv_sweep) {
|
||||
if ($mtwv_sweep{$threshold} > $mtwv) {
|
||||
$mtwv = $mtwv_sweep{$threshold};
|
||||
$mtwv_threshold = $threshold;
|
||||
}
|
||||
}
|
||||
|
||||
$atwv /= scalar(keys %keywords);
|
||||
$atwv = sprintf("%.4f", $atwv);
|
||||
$oracle_atwv /= scalar(keys %keywords);
|
||||
$oracle_atwv = sprintf("%.4f", $oracle_atwv);
|
||||
print "Original ATWV = $atwv\n";
|
||||
print "Oracle ATWV = $oracle_atwv\n";
|
||||
$otwv /= scalar(keys %keywords);
|
||||
$otwv = sprintf("%.4f", $otwv);
|
||||
$mtwv /= scalar(keys %keywords);
|
||||
$mtwv = sprintf("%.4f", $mtwv);
|
||||
print "ATWV = $atwv\n";
|
||||
print "OTWV = $otwv\n";
|
||||
print "MTWV = $mtwv, THRESHOLD = $mtwv_threshold\n";
|
||||
|
|
|
@ -41,9 +41,10 @@ nnet_8m_6l/decode_eval_iter270/cer_10:%CER 25.72 [ 1945 / 7562, 405 ins, 533 del
|
|||
nnet_8m_6l/decode_eval_iter280/cer_10:%CER 27.43 [ 2074 / 7562, 424 ins, 605 del, 1045 sub ]
|
||||
nnet_8m_6l/decode_eval_iter290/cer_10:%CER 26.37 [ 1994 / 7562, 410 ins, 572 del, 1012 sub ]
|
||||
|
||||
nnet_8m_6l/decode_eval/cer_10:%CER 25.55 [ 1932 / 7562, 405 ins, 549 del, 978 sub ] # 6 layers neural network
|
||||
nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758 sub ] # 6 layers neural network (nnet2 script, 1024 neurons)
|
||||
nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ] # 4 layers neural network
|
||||
nnet_8m_6l/decode_eval/cer_10:%CER 25.55 [ 1932 / 7562, 405 ins, 549 del, 978 sub ] # 6 hidden layers neural network
|
||||
nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758 sub ] # 6 hidden layers neural network (nnet2 script, 1024 neurons)
|
||||
nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ] # 3 hidden layers neural network
|
||||
nnet_tanh_3l/decode_eval/cer_10:%CER 22.11 [ 1672 / 7562, 391 ins, 489 del, 792 sub ] # 3 hidden layers neural network (nnet2 script, 1024 neurons)
|
||||
|
||||
tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ] # 6 layers DNN - pretrained RBM, cross entropy trained DNN
|
||||
tri5a_pretrain-dbn_dnn_smbr/decode_it1/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ] # sMBR trained DNN
|
||||
|
@ -97,6 +98,7 @@ nnet_8m_6l/decode_eval_closelm_iter290/cer_10:%CER 20.40 [ 1543 / 7562, 323 ins,
|
|||
nnet_8m_6l/decode_eval_closelm/cer_10:%CER 20.68 [ 1564 / 7562, 351 ins, 483 del, 730 sub ]
|
||||
nnet_tanh_6l/decode_eval_closelm/cer_10:%CER 17.10 [ 1293 / 7562, 337 ins, 448 del, 508 sub ]
|
||||
nnet_4m_3l/decode_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 335 ins, 439 del, 523 sub ]
|
||||
nnet_tanh_3l/decode_eval_closelm/cer_10:%CER 17.22 [ 1302 / 7562, 349 ins, 434 del, 519 sub ]
|
||||
|
||||
tri5a_pretrain-dbn_dnn/decode_closelm/cer_10:%CER 16.54 [ 1251 / 7562, 346 ins, 413 del, 492 sub ]
|
||||
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1/cer_10:%CER 15.31 [ 1158 / 7562, 280 ins, 410 del, 468 sub ]
|
||||
|
@ -130,6 +132,7 @@ exp/sgmm_5a_mmi_b0.1/decode_wide_eval_4/cer_10:%CER 23.17 [ 1752 / 7562, 373 ins
|
|||
exp/nnet_8m_6l/decode_wide_eval/cer_10:%CER 24.13 [ 1825 / 7562, 384 ins, 535 del, 906 sub ]
|
||||
exp/nnet_tanh_6l/decode_wide_eval/cer_10:%CER 21.22 [ 1605 / 7562, 365 ins, 485 del, 755 sub ]
|
||||
exp/nnet_4m_3l/decode_wide_eval/cer_10:%CER 22.16 [ 1676 / 7562, 365 ins, 505 del, 806 sub ]
|
||||
exp/nnet_tanh_3l/decode_wide_eval/cer_10:%CER 21.95 [ 1660 / 7562, 382 ins, 488 del, 790 sub ]
|
||||
exp/tri5a_pretrain-dbn_dnn/decode_dnnwide/cer_10:%CER 20.47 [ 1548 / 7562, 383 ins, 467 del, 698 sub ]
|
||||
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it1_dnnwide/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ]
|
||||
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide/cer_10:%CER 18.73 [ 1416 / 7562, 310 ins, 446 del, 660 sub ]
|
||||
|
@ -157,6 +160,7 @@ exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_4/cer_10:%CER 19.27 [ 1457 / 7562,
|
|||
exp/nnet_8m_6l/decode_wide_eval_closelm/cer_10:%CER 17.87 [ 1351 / 7562, 343 ins, 453 del, 555 sub ]
|
||||
exp/nnet_tanh_6l/decode_wide_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 336 ins, 452 del, 509 sub ]
|
||||
exp/nnet_4m_3l/decode_wide_eval_closelm/cer_10:%CER 17.02 [ 1287 / 7562, 330 ins, 436 del, 521 sub ]
|
||||
exp/nnet_tanh_3l/decode_wide_eval_closelm/cer_10:%CER 17.31 [ 1309 / 7562, 348 ins, 441 del, 520 sub ]
|
||||
exp/tri5a_pretrain-dbn_dnn/decode_closelm_dnnwide/cer_10:%CER 16.42 [ 1242 / 7562, 337 ins, 414 del, 491 sub ]
|
||||
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1_dnnwide/cer_10:%CER 15.26 [ 1154 / 7562, 279 ins, 409 del, 466 sub ]
|
||||
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide/cer_10:%CER 15.31 [ 1158 / 7562, 279 ins, 408 del, 471 sub ]
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
ulimit -u 10000
|
||||
|
||||
# 6 hidden layers DNN
|
||||
(
|
||||
steps/nnet2/train_tanh.sh \
|
||||
--mix-up 8000 \
|
||||
|
@ -36,3 +37,28 @@ local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_6l/decode_wid
|
|||
|
||||
)
|
||||
|
||||
# 3 hidden layers DNN
|
||||
(
|
||||
steps/nnet2/train_tanh.sh \
|
||||
--mix-up 8000 \
|
||||
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
|
||||
--num-hidden-layers 3 --hidden-layer-dim 1024 \
|
||||
--cmd "$decode_cmd" \
|
||||
data/train data/lang exp/tri5a_ali_dt100k exp/nnet_tanh_3l || exit 1
|
||||
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_tanh_3l/decode_eval &
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_tanh_3l/decode_eval_closelm &
|
||||
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_tanh_3l/decode_wide_eval &
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_tanh_3l/decode_wide_eval_closelm &
|
||||
wait
|
||||
|
||||
|
||||
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_tanh_3l/decode_eval
|
||||
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_3l/decode_eval_closelm
|
||||
|
||||
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_tanh_3l/decode_wide_eval
|
||||
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_3l/decode_wide_eval_closelm
|
||||
|
||||
)
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
# modified from a file that was:
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
# Copyright 2012 Vassil Panayotov
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
|
@ -54,11 +53,10 @@ cat $RMROOT/rm1/etc/rm1_test.fileids | \
|
|||
# this is needed, because the original "al_sents.snr" file is not available
|
||||
# (and because CMU's train utterances have tags like '<sil>' added)
|
||||
cat $RMROOT/rm1/etc/rm1_train.transcription |\
|
||||
sed -e 's/\(.*\)\(([a-z][a-z][0-9]\+)\)/\1\U\2/' |\
|
||||
sed -e 's:</\?si\?l\?>::g' -e 's:([0-9])::g' |\
|
||||
sed -e 's:\([ ][ ]\+\): :g' -e 's:^[ ]\+::g' |\
|
||||
cat $RMROOT/rm1/etc/rm1_test.transcription - \
|
||||
> al_sents.snr
|
||||
tr '[a-z]' '[A-Z]' |\
|
||||
sed -E -e 's:</?S(IL)?>: :g' -e 's:\([0-9]\): :g' -e 's: +: :g' -e 's:^ +::' |\
|
||||
cat $RMROOT/rm1/etc/rm1_test.transcription - \
|
||||
> al_sents.snr
|
||||
|
||||
# training set
|
||||
../../local/make_trans.pl trn train.flist al_sents.snr train_trans.txt train.scp
|
||||
|
|
|
@ -4,59 +4,80 @@
|
|||
# Switchboard portion of eval2000, excluding CallHome, which is
|
||||
# substantially easier.
|
||||
|
||||
# These results are slightly out of date: since then I changed
|
||||
# the LDA+MLLT to use 7, not 9 frames of context, and also increased
|
||||
# the learning rate for the "indirect" fMMI.
|
||||
|
||||
for x in exp/{mono,tri,sgmm,nnet}*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null
|
||||
for x in exp/{mono,tri,sgmm,nnet}*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null
|
||||
|
||||
|
||||
# These results are still partial.
|
||||
|
||||
exp/tri1/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 61.5 26.8 11.7 3.2 41.7 70.2 |
|
||||
exp/tri1/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 61.1 27.3 11.6 3.5 42.3 70.3 |
|
||||
exp/tri2/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 62.8 26.1 11.0 3.2 40.3 70.1 |
|
||||
exp/tri2/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 62.7 26.9 10.3 3.7 40.9 70.5 |
|
||||
exp/tri3a/decode_eval2000_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 69.7 21.6 8.6 3.2 33.5 68.0 |
|
||||
exp/tri3a/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 69.3 22.0 8.7 3.4 34.1 67.3 |
|
||||
exp/tri3b/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 74.3 18.0 7.6 2.9 28.6 65.6 |
|
||||
exp/tri3b/decode_eval2000_sw1_fsh_tgpr_newcode/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 74.3 18.0 7.6 2.9 28.6 65.6 |
|
||||
exp/tri3b/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 74.0 18.7 7.3 3.0 29.0 66.5 |
|
||||
exp/tri3b/decode_eval2000_sw1_tg_newcode/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 74.0 18.7 7.3 3.0 29.0 66.5 |
|
||||
exp/tri4a/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 78.4 15.2 6.3 2.6 24.1 61.4 |
|
||||
exp/tri4a/decode_eval2000_sw1_fsh_tgpr.si/score_11/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 71.8 20.7 7.5 3.6 31.8 67.4 |
|
||||
exp/tri4a/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 78.1 15.6 6.3 2.7 24.6 61.7 |
|
||||
exp/tri4a/decode_eval2000_sw1_tg.si/score_11/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 71.3 21.2 7.5 3.8 32.5 67.7 |
|
||||
exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_16/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 80.4 13.3 6.2 2.1 21.7 60.0 |
|
||||
exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 73.7 18.9 7.4 3.0 29.3 65.9 |
|
||||
exp/tri4b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 80.1 14.1 5.8 2.5 22.4 60.8 |
|
||||
exp/tri4b/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys: | Sum/Avg | 1831 21395 | 73.6 19.3 7.1 3.6 30.0 66.2 |
|
||||
%WER 37.2 | 1831 21395 | 65.9 24.1 10.0 3.1 37.2 67.8 | exp/tri1/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 37.4 | 1831 21395 | 65.9 24.1 10.0 3.4 37.4 67.9 | exp/tri1/decode_eval2000_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 36.8 | 1831 21395 | 66.5 23.8 9.7 3.3 36.8 68.3 | exp/tri2/decode_eval2000_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 37.0 | 1831 21395 | 66.6 24.1 9.2 3.7 37.0 68.4 | exp/tri2/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 29.1 | 1831 21395 | 74.0 18.3 7.7 3.1 29.1 65.4 | exp/tri3b/decode_eval2000_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 29.7 | 1831 21395 | 73.6 18.6 7.7 3.3 29.7 65.3 | exp/tri3b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 24.3 | 1831 21395 | 77.9 15.0 7.1 2.3 24.3 61.5 | exp/tri4a/decode_eval2000_sw1_fsh_tgpr/score_17/eval2000.ctm.swbd.filt.sys
|
||||
%WER 32.6 | 1831 21395 | 71.2 21.4 7.4 3.8 32.6 66.9 | exp/tri4a/decode_eval2000_sw1_fsh_tgpr.si/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 25.0 | 1831 21395 | 77.7 15.6 6.6 2.8 25.0 62.4 | exp/tri4a/decode_eval2000_sw1_tg/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 33.2 | 1831 21395 | 70.8 21.7 7.5 4.0 33.2 67.1 | exp/tri4a/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.5 | 1831 21395 | 79.0 14.7 6.3 2.5 23.5 61.3 | exp/tri4a_fmmi_b0.1/decode_eval2000_it4_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.7 | 1831 21395 | 78.7 14.8 6.5 2.4 23.7 62.0 | exp/tri4a_fmmi_b0.1/decode_eval2000_it4_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.1 | 1831 21395 | 80.0 13.3 6.7 2.1 22.1 60.1 | exp/tri4a_fmmi_b0.1/decode_eval2000_it5_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.6 | 1831 21395 | 79.5 13.9 6.6 2.2 22.6 60.6 | exp/tri4a_fmmi_b0.1/decode_eval2000_it5_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.9 | 1831 21395 | 80.6 13.8 5.6 2.5 21.9 59.3 | exp/tri4a_fmmi_b0.1/decode_eval2000_it6_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.2 | 1831 21395 | 80.2 13.9 5.9 2.4 22.2 60.2 | exp/tri4a_fmmi_b0.1/decode_eval2000_it6_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.5 | 1831 21395 | 80.9 13.5 5.6 2.5 21.5 59.2 | exp/tri4a_fmmi_b0.1/decode_eval2000_it7_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.8 | 1831 21395 | 80.7 13.7 5.5 2.5 21.8 59.7 | exp/tri4a_fmmi_b0.1/decode_eval2000_it7_sw1_tg/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.3 | 1831 21395 | 81.2 13.3 5.5 2.6 21.3 59.1 | exp/tri4a_fmmi_b0.1/decode_eval2000_it8_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.7 | 1831 21395 | 80.7 13.4 5.9 2.4 21.7 59.6 | exp/tri4a_fmmi_b0.1/decode_eval2000_it8_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.0 | 1831 21395 | 79.2 14.2 6.5 2.2 23.0 60.5 | exp/tri4a_mmi_b0.1/decode_eval2000_1.mdl_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.5 | 1831 21395 | 79.0 14.8 6.2 2.5 23.5 60.8 | exp/tri4a_mmi_b0.1/decode_eval2000_1.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.3 | 1831 21395 | 79.8 13.7 6.5 2.1 22.3 59.4 | exp/tri4a_mmi_b0.1/decode_eval2000_2.mdl_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.8 | 1831 21395 | 79.5 14.3 6.2 2.3 22.8 60.0 | exp/tri4a_mmi_b0.1/decode_eval2000_2.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.0 | 1831 21395 | 80.4 13.8 5.8 2.3 22.0 59.3 | exp/tri4a_mmi_b0.1/decode_eval2000_3.mdl_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.4 | 1831 21395 | 79.9 13.9 6.2 2.3 22.4 59.4 | exp/tri4a_mmi_b0.1/decode_eval2000_3.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.7 | 1831 21395 | 80.6 13.6 5.8 2.3 21.7 59.0 | exp/tri4a_mmi_b0.1/decode_eval2000_4.mdl_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.1 | 1831 21395 | 80.3 13.9 5.8 2.5 22.1 59.4 | exp/tri4a_mmi_b0.1/decode_eval2000_4.mdl_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.8 | 1831 21395 | 80.5 13.7 5.8 2.3 21.8 59.3 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 29.3 | 1831 21395 | 74.1 18.8 7.0 3.4 29.3 64.8 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.4 | 1831 21395 | 80.0 13.9 6.1 2.4 22.4 60.0 | exp/tri4b/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys
|
||||
%WER 30.3 | 1831 21395 | 73.1 19.7 7.1 3.4 30.3 64.7 | exp/tri4b/decode_eval2000_sw1_tg.si/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.7 | 1831 21395 | 81.3 12.8 6.0 2.0 20.7 59.3 | exp/tri4b_fmmi_b0.1/decode_eval2000_it4_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.4 | 1831 21395 | 81.0 13.2 5.8 2.4 21.4 59.3 | exp/tri4b_fmmi_b0.1/decode_eval2000_it4_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.6 | 1831 21395 | 82.2 12.0 5.8 1.9 19.6 57.2 | exp/tri4b_fmmi_b0.1/decode_eval2000_it5_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.2 | 1831 21395 | 81.9 12.5 5.6 2.1 20.2 56.9 | exp/tri4b_fmmi_b0.1/decode_eval2000_it5_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.4 | 1831 21395 | 82.7 12.0 5.3 2.2 19.4 56.9 | exp/tri4b_fmmi_b0.1/decode_eval2000_it6_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.9 | 1831 21395 | 82.1 12.2 5.6 2.0 19.9 57.0 | exp/tri4b_fmmi_b0.1/decode_eval2000_it6_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.3 | 1831 21395 | 82.9 12.0 5.2 2.1 19.3 56.4 | exp/tri4b_fmmi_b0.1/decode_eval2000_it7_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.8 | 1831 21395 | 82.4 12.3 5.3 2.2 19.8 56.6 | exp/tri4b_fmmi_b0.1/decode_eval2000_it7_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.3 | 1831 21395 | 82.9 11.9 5.2 2.2 19.3 56.5 | exp/tri4b_fmmi_b0.1/decode_eval2000_it8_sw1_fsh_tgpr/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.7 | 1831 21395 | 82.5 12.3 5.2 2.2 19.7 56.7 | exp/tri4b_fmmi_b0.1/decode_eval2000_it8_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.2 | 1831 21395 | 81.6 12.4 5.9 1.9 20.2 57.6 | exp/tri4b_mmi_b0.1/decode_eval2000_1.mdl_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.7 | 1831 21395 | 81.4 12.8 5.7 2.1 20.7 57.9 | exp/tri4b_mmi_b0.1/decode_eval2000_1.mdl_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.7 | 1831 21395 | 82.2 12.1 5.7 1.9 19.7 57.3 | exp/tri4b_mmi_b0.1/decode_eval2000_2.mdl_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.3 | 1831 21395 | 81.9 12.6 5.5 2.2 20.3 57.8 | exp/tri4b_mmi_b0.1/decode_eval2000_2.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.5 | 1831 21395 | 82.5 12.0 5.5 2.0 19.5 56.1 | exp/tri4b_mmi_b0.1/decode_eval2000_3.mdl_sw1_fsh_tgpr/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.0 | 1831 21395 | 82.0 12.4 5.5 2.1 20.0 56.8 | exp/tri4b_mmi_b0.1/decode_eval2000_3.mdl_sw1_tg/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.5 | 1831 21395 | 82.7 12.5 4.8 2.3 19.5 56.4 | exp/tri4b_mmi_b0.1/decode_eval2000_4.mdl_sw1_fsh_tgpr/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.9 | 1831 21395 | 82.3 12.5 5.2 2.2 19.9 56.7 | exp/tri4b_mmi_b0.1/decode_eval2000_4.mdl_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.4 | 1831 21395 | 79.2 13.6 7.3 2.6 23.4 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 30.4 | 1831 21395 | 73.0 18.7 8.3 3.4 30.4 68.1 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.6 | 1831 21395 | 78.9 13.6 7.5 2.5 23.6 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys
|
||||
%WER 31.0 | 1831 21395 | 72.7 19.0 8.3 3.7 31.0 68.5 | exp/tri4c_reseg/decode_eval2000_sw1_tg.si/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.1 | 1831 21395 | 81.2 12.8 6.0 2.3 21.1 59.7 | exp/sgmm2_5a/decode_eval2000_sw1_fsh_tgpr/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 21.3 | 1831 21395 | 80.9 13.0 6.2 2.2 21.3 59.5 | exp/sgmm2_5a/decode_eval2000_sw1_tg/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.7 | 1831 21395 | 82.4 12.0 5.6 2.2 19.7 57.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it1/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.1 | 1831 21395 | 82.8 11.5 5.7 1.9 19.1 56.7 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it2/score_11/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.0 | 1831 21395 | 83.2 11.5 5.3 2.1 19.0 56.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it3/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 18.9 | 1831 21395 | 83.3 11.6 5.1 2.2 18.9 56.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_fsh_tgpr_it4/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 20.4 | 1831 21395 | 81.9 12.4 5.7 2.3 20.4 57.8 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.8 | 1831 21395 | 82.5 12.1 5.4 2.3 19.8 57.3 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.5 | 1831 21395 | 82.8 12.0 5.3 2.3 19.5 56.9 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_10/eval2000.ctm.swbd.filt.sys
|
||||
%WER 19.5 | 1831 21395 | 82.9 12.0 5.1 2.4 19.5 56.5 | exp/sgmm2_5a_mmi_b0.1/decode_eval2000_sw1_tg_it4/score_10/eval2000.ctm.swbd.filt.sys
|
||||
|
||||
|
||||
# some more recent results (Sep 25 2013), from tri4b and tri4c_reseg, to
|
||||
# see the effect of resegmentation. Note: we're only looking at the "swbd" results here,
|
||||
# the callhome results or total results are terrible because of huge insertions, because
|
||||
# it seems that only some segments of the audio files are in the stm. I'm not sure
|
||||
# where to get the start and end points in the files, that they intended us to
|
||||
# decode.
|
||||
%WER 22.2 | 1831 21395 | 80.3 13.8 5.9 2.5 22.2 60.1 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 29.3 | 1831 21395 | 73.5 18.7 7.8 2.9 29.3 65.0 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_17/eval2000.ctm.swbd.filt.sys
|
||||
%WER 22.5 | 1831 21395 | 79.8 13.8 6.4 2.3 22.5 60.3 | exp/tri4b/decode_eval2000_sw1_tg/score_17/eval2000.ctm.swbd.filt.sys
|
||||
%WER 30.5 | 1831 21395 | 73.1 19.8 7.1 3.6 30.5 65.8 | exp/tri4b/decode_eval2000_sw1_tg.si/score_14/eval2000.ctm.swbd.filt.sys
|
||||
|
||||
%WER 22.9 | 1831 21395 | 79.7 13.4 6.9 2.6 22.9 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
|
||||
%WER 29.6 | 1831 21395 | 73.8 18.2 8.1 3.4 29.6 66.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
|
||||
%WER 23.5 | 1831 21395 | 79.1 13.8 7.1 2.6 23.5 63.6 | exp/tri4c_reseg/decode_eval2000_sw1_tg/score_15/eval2000.ctm.swbd.filt.sys
|
||||
%WER 30.9 | 1831 21395 | 73.1 19.0 7.9 4.0 30.9 67.6 | exp/tri4c_reseg/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys
|
||||
# so the resegmented one is about 0.3 to 1.0 worse, but the #sub is actually down, it's due to more deletions
|
||||
# and insertions. This is kind of what we'd expect, since the reference segmentation is a kind of "oracle".
|
||||
|
||||
# below are some results where I kept the segments that the segmentation
|
||||
# regarded as noise (e.g. cough, etc.). Results after adaptation almost identical, but
|
||||
# 0.1% better with the switchboard-only LM.
|
||||
|
||||
%WER 22.9 | 1831 21395 | 79.7 13.4 6.9 2.6 22.9 62.6 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_fsh_tgpr/score_14/eval2000_with_noise.ctm.swbd.filt.sys
|
||||
%WER 29.7 | 1831 21395 | 73.6 18.3 8.0 3.4 29.7 67.0 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_fsh_tgpr.si/score_13/eval2000_with_noise.ctm.swbd.filt.sys
|
||||
%WER 23.6 | 1831 21395 | 79.1 14.0 6.9 2.8 23.6 64.2 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_tg/score_14/eval2000_with_noise.ctm.swbd.filt.sys
|
||||
%WER 30.8 | 1831 21395 | 72.9 19.0 8.1 3.7 30.8 67.5 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_tg.si/score_13/eval2000_with_noise.ctm.swbd.filt.sys
|
||||
%WER 29.10 [ 14382 / 49427, 1963 ins, 3394 del, 9025 sub ] exp/tri4b/decode_train_dev_sw1_fsh_tgpr/wer_15
|
||||
%WER 37.81 [ 18686 / 49427, 2078 ins, 4625 del, 11983 sub ] exp/tri4b/decode_train_dev_sw1_fsh_tgpr.si/wer_15
|
||||
%WER 29.53 [ 14598 / 49427, 1885 ins, 3538 del, 9175 sub ] exp/tri4b/decode_train_dev_sw1_tg/wer_16
|
||||
%WER 38.42 [ 18990 / 49427, 2154 ins, 4461 del, 12375 sub ] exp/tri4b/decode_train_dev_sw1_tg.si/wer_15
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
beam=13.0 # beam for decoding. Was 13.0 in the scripts.
|
||||
latbeam=8.0 # this has most effect on size of the lattices.
|
|
@ -116,7 +116,7 @@ acwt=0.08333
|
|||
{
|
||||
steps/align_nnet.sh --nj 250 --cmd "$train_cmd" \
|
||||
data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_ali_all || exit 1;
|
||||
steps/make_denlats_nnet.sh --nj 250 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
|
||||
steps/make_denlats_nnet.sh --nj 10 --sub-split 100 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
|
||||
data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_denlats_all || exit 1;
|
||||
}
|
||||
# Now we re-train the hybrid by single iteration of sMBR
|
||||
|
@ -156,7 +156,7 @@ acwt=0.08333
|
|||
{
|
||||
steps/align_nnet.sh --nj 250 --cmd "$train_cmd" \
|
||||
data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_ali_all || exit 1;
|
||||
steps/make_denlats_nnet.sh --nj 250 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
|
||||
steps/make_denlats_nnet.sh --nj 10 --sub-split 100 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
|
||||
data-fmllr-tri4b/train_nodup data/lang $srcdir ${srcdir}_denlats_all || exit 1;
|
||||
}
|
||||
# Now we re-train the hybrid by several iterations of sMBR
|
||||
|
|
|
@ -13,8 +13,10 @@
|
|||
|
||||
. cmd.sh
|
||||
. path.sh
|
||||
set -e # exit on error
|
||||
|
||||
local/swbd1_data_prep.sh /export/corpora3/LDC/LDC97S62
|
||||
local/swbd1_data_prep.sh /home/dpovey/data/LDC97S62
|
||||
# local/swbd1_data_prep.sh /home/dpovey/data/LDC97S62
|
||||
# local/swbd1_data_prep.sh /data/corpora0/LDC97S62
|
||||
# local/swbd1_data_prep.sh /mnt/matylda2/data/SWITCHBOARD_1R2
|
||||
# local/swbd1_data_prep.sh /exports/work/inf_hcrc_cstr_general/corpora/switchboard/switchboard1
|
||||
|
@ -29,9 +31,12 @@ utils/prepare_lang.sh data/local/dict "<unk>" data/local/lang data/lang
|
|||
|
||||
# If you have the Fisher data, you can set this "fisher_opt" variable.
|
||||
fisher_opt="--fisher /export/corpora3/LDC/LDC2004T19/fe_03_p1_tran/"
|
||||
#fisher_opt="--fisher /home/dpovey/data/LDC2004T19/fe_03_p1_tran/"
|
||||
#fisher_opt="--fisher /data/corpora0/LDC2004T19/fe_03_p1_tran/"
|
||||
# edinburgh:
|
||||
# fisher_opt="--fisher /exports/work/inf_hcrc_cstr_general/corpora/fisher/transcripts"
|
||||
# brno:
|
||||
# fisher_opt="--fisher /mnt/matylda2/data/FISHER/fe_03_p1_tran" # BUT
|
||||
local/swbd1_train_lms.sh $fisher_opt \
|
||||
data/local/train/text data/local/dict/lexicon.txt data/local/lm
|
||||
# We don't really need all these options for SRILM, since the LM training script
|
||||
|
@ -48,7 +53,7 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
|
|||
# For some funny reason we are still using IRSTLM for doing LM pruning :)
|
||||
export PATH=$PATH:../../../tools/irstlm/bin/
|
||||
prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o3g.kn.gz /dev/stdout \
|
||||
| gzip -c > data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
|
||||
| gzip -c > data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz || exit 1
|
||||
LM=data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
|
||||
utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
|
||||
data/lang $LM data/local/dict/lexicon.txt data/lang_sw1_fsh_tgpr
|
||||
|
@ -61,23 +66,24 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
|
|||
# local/eval2000_data_prep.sh /data/corpora0/LDC2002S09/hub5e_00 /data/corpora0/LDC2002T43
|
||||
# local/eval2000_data_prep.sh /mnt/matylda2/data/HUB5_2000/ /mnt/matylda2/data/HUB5_2000/2000_hub5_eng_eval_tr
|
||||
# local/eval2000_data_prep.sh /exports/work/inf_hcrc_cstr_general/corpora/switchboard/hub5/2000 /exports/work/inf_hcrc_cstr_general/corpora/switchboard/hub5/2000/transcr
|
||||
# local/eval2000_data_prep.sh /home/dpovey/data/LDC2002S09/hub5e_00 /home/dpovey/data/LDC2002T43
|
||||
local/eval2000_data_prep.sh /export/corpora2/LDC/LDC2002S09/hub5e_00 /export/corpora2/LDC/LDC2002T43
|
||||
|
||||
# mfccdir should be some place with a largish disk where you
|
||||
# want to store MFCC features.
|
||||
mfccdir=mfcc
|
||||
|
||||
steps/make_mfcc.sh --compress true --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir || exit 1;
|
||||
steps/make_mfcc.sh --compress true --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir
|
||||
steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir
|
||||
|
||||
# Remove the small number of utterances that couldn't be extracted for some
|
||||
# reason (e.g. too short; no such file).
|
||||
utils/fix_data_dir.sh data/train || exit 1;
|
||||
utils/fix_data_dir.sh data/train
|
||||
|
||||
# Create MFCCs for the eval set
|
||||
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/eval2000 exp/make_mfcc/eval2000 $mfccdir || exit 1;
|
||||
steps/compute_cmvn_stats.sh data/eval2000 exp/make_mfcc/eval2000 $mfccdir || exit 1;
|
||||
utils/fix_data_dir.sh data/eval2000 || exit 1 # remove segments with problems
|
||||
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/eval2000 exp/make_mfcc/eval2000 $mfccdir
|
||||
steps/compute_cmvn_stats.sh data/eval2000 exp/make_mfcc/eval2000 $mfccdir
|
||||
utils/fix_data_dir.sh data/eval2000 # remove segments with problems
|
||||
|
||||
# Use the first 4k sentences as dev set. Note: when we trained the LM, we used
|
||||
# the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the
|
||||
|
@ -114,13 +120,13 @@ local/remove_dup_utts.sh 300 data/train_nodev data/train_nodup # 286hr
|
|||
|
||||
## Starting basic training on MFCC features
|
||||
steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
|
||||
data/train_10k_nodup data/lang exp/mono || exit 1;
|
||||
data/train_10k_nodup data/lang exp/mono
|
||||
|
||||
steps/align_si.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train_30k_nodup data/lang exp/mono exp/mono_ali || exit 1;
|
||||
data/train_30k_nodup data/lang exp/mono exp/mono_ali
|
||||
|
||||
steps/train_deltas.sh --cmd "$train_cmd" \
|
||||
3200 30000 data/train_30k_nodup data/lang exp/mono_ali exp/tri1 || exit 1;
|
||||
3200 30000 data/train_30k_nodup data/lang exp/mono_ali exp/tri1
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
(
|
||||
|
@ -133,10 +139,10 @@ for lm_suffix in tg fsh_tgpr; do
|
|||
done
|
||||
|
||||
steps/align_si.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train_30k_nodup data/lang exp/tri1 exp/tri1_ali || exit 1;
|
||||
data/train_30k_nodup data/lang exp/tri1 exp/tri1_ali
|
||||
|
||||
steps/train_deltas.sh --cmd "$train_cmd" \
|
||||
3200 30000 data/train_30k_nodup data/lang exp/tri1_ali exp/tri2 || exit 1;
|
||||
3200 30000 data/train_30k_nodup data/lang exp/tri1_ali exp/tri2
|
||||
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
|
@ -156,11 +162,11 @@ done
|
|||
# From now, we start building a bigger system (on train_100k_nodup, which has
|
||||
# 110hrs of data). We start with the LDA+MLLT system
|
||||
steps/align_si.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train_100k_nodup data/lang exp/tri2 exp/tri2_ali_100k_nodup || exit 1;
|
||||
data/train_100k_nodup data/lang exp/tri2 exp/tri2_ali_100k_nodup
|
||||
|
||||
# Train tri3b, which is LDA+MLLT, on 100k_nodup data.
|
||||
steps/train_lda_mllt.sh --cmd "$train_cmd" \
|
||||
5500 90000 data/train_100k_nodup data/lang exp/tri2_ali_100k_nodup exp/tri3b || exit 1;
|
||||
5500 90000 data/train_100k_nodup data/lang exp/tri2_ali_100k_nodup exp/tri3b
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
(
|
||||
|
@ -174,12 +180,12 @@ done
|
|||
|
||||
# Train tri4a, which is LDA+MLLT+SAT, on 100k_nodup data.
|
||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train_100k_nodup data/lang exp/tri3b exp/tri3b_ali_100k_nodup || exit 1;
|
||||
data/train_100k_nodup data/lang exp/tri3b exp/tri3b_ali_100k_nodup
|
||||
|
||||
|
||||
steps/train_sat.sh --cmd "$train_cmd" \
|
||||
5500 90000 data/train_100k_nodup data/lang exp/tri3b_ali_100k_nodup \
|
||||
exp/tri4a || exit 1;
|
||||
exp/tri4a
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
(
|
||||
|
@ -198,11 +204,11 @@ done
|
|||
# 286 hours)
|
||||
# Train tri4b, which is LDA+MLLT+SAT, on train_nodup data.
|
||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train_nodup data/lang exp/tri3b exp/tri3b_ali_nodup || exit 1;
|
||||
data/train_nodup data/lang exp/tri3b exp/tri3b_ali_nodup
|
||||
|
||||
|
||||
steps/train_sat.sh --cmd "$train_cmd" \
|
||||
11500 200000 data/train_nodup data/lang exp/tri3b_ali_nodup exp/tri4b || exit 1;
|
||||
11500 200000 data/train_nodup data/lang exp/tri3b_ali_nodup exp/tri4b
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
(
|
||||
|
@ -215,7 +221,9 @@ for lm_suffix in tg fsh_tgpr; do
|
|||
$graph_dir data/train_dev exp/tri4b/decode_train_dev_sw1_${lm_suffix}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
steps/lmrescore.sh --mode 3 --cmd "$mkgraph_cmd" data/lang_sw1_fsh_tgpr data/lang_sw1_fsh_tg data/eval2000 \
|
||||
exp/tri4b/decode_eval2000_sw1_fsh_tgpr exp/tri4b/decode_eval2000_sw1_fsh_tg.3 || exit 1
|
||||
|
||||
|
||||
# MMI training starting from the LDA+MLLT+SAT systems on both the
|
||||
|
@ -229,11 +237,11 @@ steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
|
|||
steps/make_denlats.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
|
||||
--transform-dir exp/tri4a_ali_100k_nodup \
|
||||
data/train_100k_nodup data/lang exp/tri4a exp/tri4a_denlats_100k_nodup \
|
||||
|| exit 1;
|
||||
|
||||
|
||||
steps/make_denlats.sh --nj 100 --cmd "$decode_cmd" --config conf/decode.config \
|
||||
--transform-dir exp/tri4b_ali_nodup \
|
||||
data/train_nodup data/lang exp/tri4b exp/tri4b_denlats_nodup || exit 1;
|
||||
data/train_nodup data/lang exp/tri4b exp/tri4b_denlats_nodup
|
||||
|
||||
# 4 iterations of MMI seems to work well overall. The number of iterations is
|
||||
# used as an explicit argument even though train_mmi.sh will use 4 iterations by
|
||||
|
@ -241,11 +249,11 @@ steps/make_denlats.sh --nj 100 --cmd "$decode_cmd" --config conf/decode.config \
|
|||
num_mmi_iters=4
|
||||
steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 --num-iters $num_mmi_iters \
|
||||
data/train_100k_nodup data/lang exp/tri4a_{ali,denlats}_100k_nodup \
|
||||
exp/tri4a_mmi_b0.1 || exit 1;
|
||||
exp/tri4a_mmi_b0.1
|
||||
|
||||
steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 --num-iters $num_mmi_iters \
|
||||
data/train_nodup data/lang exp/tri4b_{ali,denlats}_nodup \
|
||||
exp/tri4b_mmi_b0.1 || exit 1;
|
||||
exp/tri4b_mmi_b0.1
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
|
@ -283,11 +291,11 @@ steps/train_diag_ubm.sh --silence-weight 0.5 --nj 100 --cmd "$train_cmd" \
|
|||
|
||||
steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup exp/tri4a_dubm \
|
||||
exp/tri4a_denlats_100k_nodup exp/tri4a_fmmi_b0.1 || exit 1;
|
||||
exp/tri4a_denlats_100k_nodup exp/tri4a_fmmi_b0.1
|
||||
|
||||
steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train_nodup data/lang exp/tri4b_ali_nodup exp/tri4b_dubm \
|
||||
exp/tri4b_denlats_nodup exp/tri4b_fmmi_b0.1 || exit 1;
|
||||
exp/tri4b_denlats_nodup exp/tri4b_fmmi_b0.1
|
||||
|
||||
for iter in 4 5 6 7 8; do
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
|
|
|
@ -1,28 +1,76 @@
|
|||
for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
|
||||
|
||||
# Use caution when comparing these results with other published results.
|
||||
Training Set : Timit training set (4620 sentences
|
||||
Test Set : Timit test set (1680 sentences)
|
||||
Training Set : 3696 sentences 4620 sentences
|
||||
Dev Set : 400 sentences
|
||||
Test Set : 192 sentences Timit test set (1680 sentences)
|
||||
Language Model : Bigram phoneme language model which is extracted from training set
|
||||
|
||||
# monophone, deltas.
|
||||
%PER 28.94 [ 18201 / 62901, 1598 ins, 5644 del, 10959 sub ] exp/mono/decode_bg_test/wer_4
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 33.53 [ 5048 / 15057, 397 ins, 1674 del, 2977 sub ] exp/mono/decode_dev/wer_3
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 34.77 [ 2509 / 7215, 193 ins, 826 del, 1490 sub ] exp/mono/decode_test/wer_3
|
||||
|
||||
|
||||
# tri1 : first triphone system (delta+delta-delta features)
|
||||
%PER 22.60 [ 14215 / 62901, 1796 ins, 3466 del, 8953 sub ] exp/tri1/decode_bg_test/wer_8
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 29.26 [ 4405 / 15057, 541 ins, 1281 del, 2583 sub ] exp/tri1/decode_dev/wer_6
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 30.53 [ 2203 / 7215, 259 ins, 654 del, 1290 sub ] exp/tri1/decode_test/wer_6
|
||||
|
||||
|
||||
#tri2 : an LDA+MLLT system
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 26.38 [ 3972 / 15057, 421 ins, 1269 del, 2282 sub ] exp/tri2/decode_dev/wer_7
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 28.41 [ 2050 / 7215, 220 ins, 664 del, 1166 sub ] exp/tri2/decode_test/wer_7
|
||||
|
||||
#tri2 : an LDA+MLLT system.
|
||||
%PER 20.36 [ 12807 / 62901, 1872 ins, 2914 del, 8021 sub ] exp/tri2/decode_bg_test/wer_7
|
||||
|
||||
#tri3 : Speaker Adaptive Training (SAT) system
|
||||
%PER 18.27 [ 11489 / 62901, 1681 ins, 2810 del, 6998 sub ] exp/tri3/decode_bg_test/wer_6
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 23.36 [ 3517 / 15057, 464 ins, 1001 del, 2052 sub ] exp/tri3/decode_dev/wer_4
|
||||
%WER 26.53 [ 3995 / 15057, 394 ins, 1289 del, 2312 sub ] exp/tri3/decode_dev.si/wer_7
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 24.96 [ 1801 / 7215, 245 ins, 529 del, 1027 sub ] exp/tri3/decode_test/wer_4
|
||||
%WER 27.96 [ 2017 / 7215, 214 ins, 650 del, 1153 sub ] exp/tri3/decode_test.si/wer_7
|
||||
|
||||
#SGMM2 Training
|
||||
%PER 16.17 [ 10171 / 62901, 1309 ins, 2708 del, 6154 sub ] exp/sgmm2_4/decode_bg_test/wer_6
|
||||
|
||||
# SGMM2 + MMI Training
|
||||
%PER 16.14 [ 10154 / 62901, 1845 ins, 2074 del, 6235 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it1/wer_6
|
||||
%PER 16.58 [ 10430 / 62901, 2032 ins, 2031 del, 6367 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it2/wer_7
|
||||
%PER 16.80 [ 10570 / 62901, 2071 ins, 2096 del, 6403 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it3/wer_8
|
||||
%PER 17.02 [ 10706 / 62901, 2154 ins, 2048 del, 6504 sub ] exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it4/wer_8
|
||||
#SGMM2 Training :
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 20.66 [ 3111 / 15057, 347 ins, 1022 del, 1742 sub ] exp/sgmm2_4/decode_dev/wer_5
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 22.88 [ 1651 / 7215, 189 ins, 519 del, 943 sub ] exp/sgmm2_4/decode_test/wer_5
|
||||
|
||||
|
||||
# SGMM2 + MMI Training :
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 20.48 [ 3084 / 15057, 450 ins, 849 del, 1785 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it1/wer_5
|
||||
%WER 20.20 [ 3042 / 15057, 508 ins, 740 del, 1794 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it2/wer_5
|
||||
%WER 20.36 [ 3065 / 15057, 548 ins, 711 del, 1806 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it3/wer_5
|
||||
%WER 20.40 [ 3071 / 15057, 506 ins, 762 del, 1803 sub ] exp/sgmm2_4_mmi_b0.1/decode_dev_it4/wer_6
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 22.66 [ 1635 / 7215, 250 ins, 420 del, 965 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it1/wer_5
|
||||
%WER 22.44 [ 1619 / 7215, 282 ins, 384 del, 953 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it2/wer_5
|
||||
%WER 22.62 [ 1632 / 7215, 298 ins, 369 del, 965 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it3/wer_5
|
||||
%WER 22.48 [ 1622 / 7215, 277 ins, 386 del, 959 sub ] exp/sgmm2_4_mmi_b0.1/decode_test_it4/wer_6
|
||||
|
||||
|
||||
# Hybrid System :
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 22.77 [ 3429 / 15057, 411 ins, 1057 del, 1961 sub ] exp/tri4_nnet/decode_dev/wer_3
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 24.84 [ 1792 / 7215, 197 ins, 579 del, 1016 sub ] exp/tri4_nnet/decode_test/wer_2
|
||||
|
||||
|
||||
# Combination :
|
||||
---------------------------------Dev Set------------------------------------------
|
||||
%WER 20.26 [ 3051 / 15057, 371 ins, 937 del, 1743 sub ] exp/combine_2/decode_dev_it1/wer_4
|
||||
%WER 19.91 [ 2998 / 15057, 397 ins, 870 del, 1731 sub ] exp/combine_2/decode_dev_it2/wer_4
|
||||
%WER 19.75 [ 2974 / 15057, 422 ins, 825 del, 1727 sub ] exp/combine_2/decode_dev_it3/wer_4
|
||||
%WER 19.79 [ 2980 / 15057, 373 ins, 886 del, 1721 sub ] exp/combine_2/decode_dev_it4/wer_5
|
||||
--------------------------------Test Set------------------------------------------
|
||||
%WER 21.90 [ 1580 / 7215, 191 ins, 474 del, 915 sub ] exp/combine_2/decode_test_it1/wer_4
|
||||
%WER 21.73 [ 1568 / 7215, 218 ins, 442 del, 908 sub ] exp/combine_2/decode_test_it2/wer_4
|
||||
%WER 21.62 [ 1560 / 7215, 223 ins, 423 del, 914 sub ] exp/combine_2/decode_test_it3/wer_4
|
||||
%WER 21.68 [ 1564 / 7215, 197 ins, 476 del, 891 sub ] exp/combine_2/decode_test_it4/wer_5
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This program takes as its standard input an .ndx file from the WSJ corpus that looks
|
||||
# like this:
|
||||
#;; File: tr_s_wv1.ndx, updated 04/26/94
|
||||
#;;
|
||||
#;; Index for WSJ0 SI-short Sennheiser training data
|
||||
#;; Data is read WSJ sentences, Sennheiser mic.
|
||||
#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts
|
||||
#;; per speaker TI) = 7236 utts
|
||||
#;;
|
||||
#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1
|
||||
#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1
|
||||
#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1
|
||||
|
||||
#and as command-line arguments it takes the names of the WSJ disk locations, e.g.:
|
||||
#/mnt/matylda2/data/WSJ0/11-1.1 /mnt/matylda2/data/WSJ0/11-10.1 ... etc.
|
||||
# It outputs a list of absolute pathnames (it does this by replacing e.g. 11_1_1 with
|
||||
# /mnt/matylda2/data/WSJ0/11-1.1.
|
||||
# It also does a slight fix because one of the WSJ disks (WSJ1/13-16.1) was distributed with
|
||||
# uppercase rather than lower case filenames.
|
||||
|
||||
foreach $fn (@ARGV) {
|
||||
$fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n";
|
||||
$disk_id=$1;
|
||||
$disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1
|
||||
$fn =~ s:/$::; # Remove final slash, just in case it is present.
|
||||
$disk2fn{$disk_id} = $fn;
|
||||
}
|
||||
|
||||
while(<STDIN>){
|
||||
if(m/^;/){ next; } # Comment. Ignore it.
|
||||
else {
|
||||
m/^([0-9_]+):\s*(\S+)$/ || die "Could not parse line $_";
|
||||
$disk=$1;
|
||||
if(!defined $disk2fn{$disk}) {
|
||||
die "Disk id $disk not found";
|
||||
}
|
||||
$filename = $2; # as a subdirectory of the distributed disk.
|
||||
if($disk eq "13_16_1" && `hostname` =~ m/fit.vutbr.cz/) {
|
||||
# The disk 13-16.1 has been uppercased for some reason, on the
|
||||
# BUT system. This is a fix specifically for that case.
|
||||
$filename =~ tr/a-z/A-Z/; # This disk contains all uppercase filenames. Why?
|
||||
}
|
||||
print "$disk2fn{$disk}/$filename\n";
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
. cmd.sh
|
||||
|
||||
mfccdir=mfcc
|
||||
|
||||
# Make "per-utterance" versions of the test sets where the speaker
|
||||
# information corresponds to utterances-- to demonstrate adaptation on
|
||||
# short utterances, particularly for basis fMLLR
|
||||
for x in "test" ; do
|
||||
y=${x}_utt
|
||||
rm -r data/$y
|
||||
cp -r data/$x data/$y
|
||||
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
|
||||
cp data/$y/utt2spk data/$y/spk2utt;
|
||||
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;
|
||||
done
|
||||
|
||||
# basis fMLLR experiments.
|
||||
# First a baseline: decode per-utterance with normal fMLLR.
|
||||
steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/tri3b/graph_bg data/test_utt exp/tri3b/decode_bg_test_utt || exit 1;
|
||||
|
||||
# get the fMLLR basis.
|
||||
steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train data/lang exp/tri3b
|
||||
|
||||
# decoding tri3b with basis fMLLR
|
||||
steps/decode_basis_fmllr.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/tri3b/graph_bg data/test exp/tri3b/decode_bg_test_basis || exit 1;
|
||||
|
||||
# The same, per-utterance.
|
||||
steps/decode_basis_fmllr.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/tri3b/graph_bg data/test_utt exp/tri3b/decode_bg_test_basis_utt || exit 1;
|
||||
|
||||
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
#prepare reverse lexicon and language model for backwards decoding
|
||||
utils/prepare_lang.sh --reverse true data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp.reverse data/lang.reverse || exit 1;
|
||||
utils/reverse_lm.sh data/local/nist_lm/lm_bg_5k.arpa.gz data/lang.reverse data/lang_test_bg_5k.reverse || exit 1;
|
||||
utils/reverse_lm_test.sh data/lang_test_bg_5k data/lang_test_bg_5k.reverse || exit 1;
|
||||
|
||||
# normal forward decoding
|
||||
utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 8 --cmd "$decode_cmd" \
|
||||
exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_10 || exit 1;
|
||||
|
||||
# backward decoding
|
||||
utils/mkgraph.sh --reverse data/lang_test_bg_5k.reverse exp/tri2a exp/tri2a/graph_bg5k_r
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 8 --cmd "$decode_cmd" \
|
||||
exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_reverse10 || exit 1;
|
||||
|
||||
# pingpong decoding
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 8 --cmd "$decode_cmd" \
|
||||
--first_pass exp/tri2a/decode_eval92_bg5k_10 exp/tri2a/graph_bg5k_r data/test_eval92 exp/tri2a/decode_eval92_bg5k_pingpong10 || exit 1;
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 8 --cmd "$decode_cmd" \
|
||||
--first_pass exp/tri2a/decode_eval92_bg5k_reverse10 exp/tri2a/graph_bg5k data/test_eval92 exp/tri2a/decode_eval92_bg5k_pongping10 || exit 1;
|
||||
|
||||
# same for bigger language models (on machine with 8GB RAM, you can run the whole decoding in 3-4 min without SGE)
|
||||
utils/prepare_lang.sh --reverse true data/local/dict_larger "<SPOKEN_NOISE>" data/local/lang_larger.reverse data/lang_bd.reverse || exit;
|
||||
utils/reverse_lm.sh --lexicon data/local/dict_larger/lexicon.txt data/local/local_lm/3gram-mincount/lm_pr6.0.gz data/lang_bd.reverse data/lang_test_bd_tgpr.reverse || exit 1;
|
||||
utils/reverse_lm_test.sh data/lang_test_bd_tgpr data/lang_test_bd_tgpr.reverse || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri2a exp/tri2a/graph_bd_tgpr
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --nj 4 --cmd run.pl \
|
||||
exp/tri2a/graph_bd_tgpr data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_10 || exit 1;
|
||||
|
||||
utils/mkgraph.sh --reverse data/lang_test_bd_tgpr.reverse exp/tri2a exp/tri2a/graph_bd_tgpr_r
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --latbeam 4.0 --reverse true --nj 4 --cmd run.pl \
|
||||
exp/tri2a/graph_bd_tgpr_r data/test_eval92 exp/tri2a/decode_eval92_bdtgpr4_reverse10 || exit 1;
|
||||
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --reverse true --nj 4 --cmd run.pl \
|
||||
--first_pass exp/tri2a/decode_eval92_bdtgpr4_10 exp/tri2a/graph_bd_tgpr_r data/test_eval92 \
|
||||
exp/tri2a/decode_eval92_bdtgpr4_pingpong10 || exit 1;
|
||||
|
||||
steps/decode_fwdbwd.sh --beam 10.0 --max-beam 20.0 --nj 4 --cmd run.pl \
|
||||
--first_pass exp/tri2a/decode_eval92_bdtgpr4_reverse10 exp/tri2a/graph_bd_tgpr data/test_eval92 \
|
||||
exp/tri2a/decode_eval92_bdtgpr4_pongping10 || exit 1;
|
|
@ -1,96 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
|
||||
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
|
||||
## This relates to the queue.
|
||||
|
||||
. ./path.sh ## Source the tools/utils (import the queue.pl)
|
||||
|
||||
|
||||
###
|
||||
### Now we can train the Deep Neural Network in a hybrid setup
|
||||
###
|
||||
### The fMLLR features are
|
||||
### -spliced,
|
||||
### -decorrelated by LDA
|
||||
### -rescaled by CMVN over dataset
|
||||
###
|
||||
|
||||
#( # Train the MLP
|
||||
dir=exp/tri4a_dnn
|
||||
$cuda_cmd $dir/_train_nnet.log \
|
||||
steps/train_nnet.sh --hid-layers 4 --hid-dim 1200 \
|
||||
--apply-cmvn false --splice-lr 4 --feat-type lda --lda-dim 300 \
|
||||
--learn-rate 0.008 --bunch-size 256 \
|
||||
data-fmllr/train data-fmllr/test_test_sup data/lang exp/tri3b exp/tri3b_ali_test $dir || exit 1;
|
||||
|
||||
# we can use the graph from the baseline system, tri4a.
|
||||
# decode . Note: the dev93 results are not valid as testing results because
|
||||
# the fMLLR was from the training transcripts.
|
||||
steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --acwt 0.10 \
|
||||
exp/tri3b/graph_bg data-fmllr/test exp/tri3b_dnn/decode_bg_test
|
||||
|
||||
# decode with big dictionary.
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri3b_dnn exp/tri3b_dnn/graph_bg || exit 1;
|
||||
|
||||
steps/decode_nnet.sh --nj 10 --cmd "$decode_cmd" --acwt 0.10 \
|
||||
exp/tri3b_dnn/graph_bg data-fmllr/test exp/tri3b_dnn/decode_bg_test
|
||||
#)
|
||||
|
||||
# Getting results [see RESULTS file]
|
||||
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
|
||||
|
||||
|
||||
|
||||
#from here
|
||||
|
||||
|
||||
#false && \
|
||||
(
|
||||
|
||||
###
|
||||
### First we need to generate the alignments,
|
||||
###
|
||||
### these are used as DNN training targets,
|
||||
### also the fMLLR transforms are needed
|
||||
###
|
||||
|
||||
# We don't really need the alignment directory, as tri4a was trained
|
||||
# on si284 and already contains alignments.
|
||||
#steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
|
||||
# data/train_si284 data/lang exp/tri4a exp/tri4a_ali_si284 || exit 1
|
||||
|
||||
steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
|
||||
data/test data/lang exp/tri3b exp/tri3b_ali_test || exit 1 #dev
|
||||
|
||||
###
|
||||
### As next step we store the fMLLR features, so we can train on them easily
|
||||
###
|
||||
|
||||
gmmdir=exp/tri3b
|
||||
|
||||
# dev93 (using alignments)
|
||||
dir=data-fmllr/test_test_sup
|
||||
# generate the features
|
||||
steps/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3b_ali_test \
|
||||
$dir data/test $gmmdir $dir/_log $dir/_data || exit 1
|
||||
|
||||
# train si284
|
||||
# generate the features
|
||||
dir=data-fmllr/train
|
||||
steps/make_fmllr_feats.sh --nj 20 --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3b \
|
||||
$dir data/train $gmmdir $dir/_log $dir/_data || exit 1
|
||||
|
||||
# eval92
|
||||
dir=data-fmllr/test
|
||||
steps/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3b/decode_bg_test \
|
||||
$dir data/test $gmmdir $dir/_log $dir/_data || exit 1
|
||||
|
||||
dir=data-fmllr/test
|
||||
steps/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3b/decode_bg_test \
|
||||
$dir data/test $gmmdir $dir/_log $dir/_data || exit 1
|
||||
)
|
|
@ -1,57 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ./cmd.sh
|
||||
|
||||
# Train and test MMI (and boosted MMI) on tri2b system.
|
||||
steps/make_denlats.sh --sub-split 20 --nj 30 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri2b exp/tri2b_denlats_train || exit 1;
|
||||
|
||||
# train the basic MMI system.
|
||||
steps/train_mmi.sh --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri2b_ali_train \
|
||||
exp/tri2b_denlats_train exp/tri2b_mmi || exit 1;
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_si.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
exp/tri2b/graph_bg data/test exp/tri2b_mmi/decode_bg_test_it$iter &
|
||||
done
|
||||
|
||||
# MMI with 0.1 boosting factor.
|
||||
steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
|
||||
data/train data/lang exp/tri2b_ali_train exp/tri2b_denlats_train \
|
||||
exp/tri2b_mmi_b0.1 || exit 1;
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_si.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
exp/tri2b/graph_bg data/test exp/tri2b_mmi_b0.1/decode_bg_test_it$iter &
|
||||
done
|
||||
|
||||
# Train a UBM with 400 components, for fMMI.
|
||||
steps/train_diag_ubm.sh --silence-weight 0.5 --nj 30 --cmd "$train_cmd" \
|
||||
400 data/train data/lang exp/tri2b_ali_train exp/dubm2b
|
||||
|
||||
steps/train_mmi_fmmi.sh --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri2b_ali_train exp/dubm2b exp/tri2b_denlats_train \
|
||||
exp/tri2b_fmmi_b0.1
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
exp/tri2b/graph_bg data/test exp/tri2b_fmmi_b0.1/decode_bg_test_it$iter &
|
||||
done
|
||||
|
||||
steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri2b_ali_train exp/dubm2b exp/tri2b_denlats_train \
|
||||
exp/tri2b_fmmi_b0.1_lr0.005 || exit 1;
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
exp/tri2b/graph_bg data/test exp/tri2b_fmmi_b0.1_lr0.005/decode_bg_test_it$iter &
|
||||
done
|
||||
|
||||
steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri2b_ali_train exp/dubm2b exp/tri2b_denlats_train \
|
||||
exp/tri2b_fmmi_indirect_b0.1
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
exp/tri2b/graph_bg data/test exp/tri2b_fmmi_indirect_b0.1/decode_bg_test_it$iter &
|
||||
done
|
|
@ -1,41 +0,0 @@
|
|||
#!/bin/bash
|
||||
. ./cmd.sh
|
||||
[ -f path.sh ] && . ./path.sh
|
||||
|
||||
steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3b_ali_train \
|
||||
data/train data/lang exp/tri3b exp/tri3b_denlats_train || exit 1;
|
||||
|
||||
steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \
|
||||
data/train data/lang exp/tri3b_ali_train exp/tri3b_denlats_train \
|
||||
exp/tri3b_mmi_b0.1 || exit 1;
|
||||
|
||||
steps/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_bg_test \
|
||||
exp/tri3b/graph_tgpr data/test exp/tri3b_mmi_b0.1/decode_bg_test
|
||||
|
||||
#first, train UBM for fMMI experiments.
|
||||
steps/train_diag_ubm.sh --silence-weight 0.5 --nj 30 --cmd "$train_cmd" \
|
||||
600 data/train data/lang exp/tri3b_ali_train exp/dubm3b
|
||||
|
||||
# Next, fMMI+MMI.
|
||||
steps/train_mmi_fmmi.sh \
|
||||
--boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali_train exp/dubm3b exp/tri3b_denlats_train exp/tri3b_fmmi_a || exit 1;
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri3b/decode_bg_test exp/tri3b/graph_bg data/test \
|
||||
exp/tri3b_fmmi_a/decode_bg_test_it$iter
|
||||
done
|
||||
|
||||
# fMMI + mmi with indirect differential.
|
||||
steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri3b_ali_train exp/dubm3b exp/tri3b_denlats_train \
|
||||
exp/tri3b_fmmi_indirect || exit 1;
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri3b/decode_bg_test exp/tri3b/graph_bg data/test \
|
||||
exp/tri3b_fmmi_indirect/decode_bg_test_it$iter
|
||||
done
|
||||
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
. ./cmd.sh
|
||||
|
||||
( # I'm using basically the same setup as for Switchboard 100 hours,
|
||||
# but slightly fewer parameters (8M -> 7M) as we have slightly less
|
||||
# data (81 hours).
|
||||
steps/train_nnet_cpu.sh \
|
||||
--mix-up 8000 \
|
||||
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
|
||||
--num-jobs-nnet 16 --num-hidden-layers 4 \
|
||||
--num-parameters 7000000 \
|
||||
--cmd "$decode_cmd" \
|
||||
data/train_si284 data/lang exp/tri4b_ali_si284 exp/nnet5c1 || exit 1
|
||||
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 10 \
|
||||
--transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
|
||||
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
|
||||
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
|
||||
--transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
|
||||
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
|
||||
)
|
||||
|
||||
|
||||
(
|
||||
steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.001 \
|
||||
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
|
||||
data/train data/lang exp/tri5c1_nnet exp/tri5c1_nnet exp/tri5c1_denlats exp/tri5c1_mmi_a
|
||||
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
|
||||
--transform-dir exp/tri3b/decode \
|
||||
exp/tri3b/graph data/test exp/tri5c1_mmi_a/decode
|
||||
)&
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
for test in dev93 eval92; do
|
||||
|
||||
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_bd_tgpr data/lang_test_bd_fg \
|
||||
data/test_${test} exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 || exit 1;
|
||||
|
||||
|
||||
# Note: for N-best-list generation, choosing the acoustic scale (12) that gave
|
||||
# the best WER on this test set. Ideally we should do this on a dev set.
|
||||
|
||||
# This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
|
||||
0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm30_0.25 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm100_0.5 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm200_0.5 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.5 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 12 \
|
||||
0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4 exp/sgmm5b_mmi_b0.1/decode_bd_fg_${test}_it4_rnnlm300_0.75 \
|
||||
|| exit 1;
|
||||
done
|
|
@ -1,64 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
. cmd.sh
|
||||
|
||||
# This step interpolates a small RNNLM (with weight 0.25) with the 4-gram LM.
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.25 data/lang_test_bd_fg data/local/rnnlm.h30.voc10k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm30_0.25 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h100.voc20k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm100_0.5 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h200.voc30k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm200_0.5 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 \
|
||||
|| exit 1;
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000
|
||||
|
||||
dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75_N1000
|
||||
rm -rf $dir
|
||||
cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N1000 $dir
|
||||
steps/rnnlmrescore.sh \
|
||||
--stage 7 --N 1000 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg $dir
|
||||
|
||||
dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.75
|
||||
rm -rf $dir
|
||||
cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
|
||||
steps/rnnlmrescore.sh \
|
||||
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.75 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg $dir
|
||||
|
||||
dir=exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.25
|
||||
rm -rf $dir
|
||||
cp -r exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5 $dir
|
||||
steps/rnnlmrescore.sh \
|
||||
--stage 7 --N 100 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.25 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg $dir
|
||||
|
||||
steps/rnnlmrescore.sh \
|
||||
--N 10 --cmd "$decode_cmd" --inv-acwt 17 \
|
||||
0.5 data/lang_test_bd_fg data/local/rnnlm.h300.voc40k data/test_eval92 \
|
||||
exp/tri3b/decode_bd_tgpr_eval92_fg exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm300_0.5_N10 \
|
||||
|| exit 1;
|
||||
|
|
@ -1,113 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This script is invoked from ../run.sh
|
||||
# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
|
||||
|
||||
. cmd.sh
|
||||
|
||||
# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for
|
||||
# training, but this shouldn't have much effect.
|
||||
|
||||
(
|
||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1;
|
||||
|
||||
steps/train_ubm.sh --cmd "$train_cmd" \
|
||||
400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1;
|
||||
|
||||
steps/train_sgmm.sh --cmd "$train_cmd" \
|
||||
3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \
|
||||
exp/ubm5b/final.ubm exp/sgmm5a || exit 1;
|
||||
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr
|
||||
steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
|
||||
exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93
|
||||
) &
|
||||
|
||||
steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \
|
||||
--use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1;
|
||||
steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \
|
||||
data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84
|
||||
|
||||
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
|
||||
data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
|
||||
exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter &
|
||||
done
|
||||
|
||||
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \
|
||||
--update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \
|
||||
exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter &
|
||||
done
|
||||
|
||||
) &
|
||||
|
||||
|
||||
(
|
||||
# The next commands are the same thing on all the si284 data.
|
||||
|
||||
# SGMM system on the si284 data [sgmm5b]
|
||||
steps/train_ubm.sh --cmd "$train_cmd" \
|
||||
600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1;
|
||||
|
||||
steps/train_sgmm.sh --cmd "$train_cmd" \
|
||||
5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
|
||||
exp/ubm5b/final.ubm exp/sgmm5b || exit 1;
|
||||
|
||||
(
|
||||
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr
|
||||
steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
|
||||
exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93
|
||||
steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
|
||||
exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1;
|
||||
steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
|
||||
exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93
|
||||
steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
|
||||
exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92
|
||||
) &
|
||||
|
||||
steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
|
||||
--use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284
|
||||
|
||||
steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
|
||||
data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284
|
||||
|
||||
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \
|
||||
data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
for test in dev93 eval92; do
|
||||
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter &
|
||||
|
||||
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \
|
||||
exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter &
|
||||
done
|
||||
done
|
||||
) &
|
||||
|
||||
|
||||
|
||||
# Train quinphone SGMM system.
|
||||
|
||||
steps/train_sgmm.sh --cmd "$train_cmd" \
|
||||
--context-opts "--context-width=5 --central-position=2" \
|
||||
5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
|
||||
exp/ubm5b/final.ubm exp/sgmm5c || exit 1;
|
||||
|
||||
# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93.
|
||||
steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
|
||||
data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This script is invoked from ../run.sh
|
||||
# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
|
||||
|
||||
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
|
||||
## This relates to the queue.
|
||||
[ -f path.sh ] && . ./path.sh
|
||||
|
||||
|
||||
|
||||
# Note: you might want to try to give the option --spk-dep-weights=false to train_sgmm2.sh;
|
||||
# this takes out the "symmetric SGMM" part which is not always helpful.
|
||||
|
||||
# SGMM system on train data [sgmm4a]. Note: the system we aligned from used the train data for training, but this shouldn't have much effect.
|
||||
|
||||
|
||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri3b exp/tri3b_ali_train || exit 1;
|
||||
|
||||
steps/train_ubm.sh --cmd "$train_cmd" \
|
||||
400 data/train data/lang exp/tri3b_ali_train exp/ubm4a || exit 1;
|
||||
|
||||
steps/train_sgmm2.sh --cmd "$train_cmd" \
|
||||
7000 9000 data/train data/lang exp/tri3b_ali_train \
|
||||
exp/ubm4a/final.ubm exp/sgmm2_4a || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4a exp/sgmm2_4a/graph_bg
|
||||
steps/decode_sgmm2.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_bg_test \
|
||||
exp/sgmm2_4a/graph_bg data/test exp/sgmm2_4a/decode_bg_test
|
||||
|
||||
|
||||
steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri3b_ali_train \
|
||||
--use-graphs true --use-gselect true data/train data/lang exp/sgmm2_4a exp/sgmm2_4a_ali_train || exit 1;
|
||||
steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train \
|
||||
data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train
|
||||
|
||||
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train --boost 0.1 \
|
||||
data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train exp/sgmm2_4a_mmi_b0.1
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
for test in "test"; do # dev93
|
||||
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri3b/decode_bg_${test} data/lang_test_bg data/${test} exp/sgmm2_4a/decode_bg_${test} exp/sgmm2_4a_mmi_b0.1/decode_bg_${test}_it$iter
|
||||
done
|
||||
done
|
||||
|
||||
# steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train --boost 0.1 \
|
||||
# --update-opts "--cov-min-value=0.9" data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train exp/sgmm2_4a_mmi_b0.1_m0.9
|
||||
|
||||
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali_train --boost 0.1 \
|
||||
--zero-if-disjoint true data/train data/lang exp/sgmm2_4a_ali_train exp/sgmm2_4a_denlats_train exp/sgmm2_4a_mmi_b0.1_z
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
for test in "test"; do #dev93
|
||||
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri3b/decode_bg_${test} data/lang_test_bg data/${test} exp/sgmm2_4a/decode_bg_${test} \
|
||||
exp/sgmm2_4a_mmi_b0.1_z/decode_bg_${test}_it$iter
|
||||
done
|
||||
done
|
||||
|
||||
# Examples of combining some of the best decodings: SGMM+MMI with
|
||||
# MMI+fMMI on a conventional system.
|
||||
|
||||
local/score_combine.sh data/test \
|
||||
data/lang_test_bg \
|
||||
exp/tri3b_fmmi_a/decode_bg_test_it1 \
|
||||
exp/sgmm2_4a_mmi_b0.1/decode_bg_test_it1 \
|
||||
exp/combine_tri3b_fmmi_a_sgmm2_4a_mmi_b0.1/decode_bg_test_it1_1
|
||||
|
||||
|
||||
# Checking MBR decode of baseline:
|
||||
cp -r -T exp/sgmm2_4a_mmi_b0.1/decode_bg_test_it3{,.mbr}
|
||||
local/score_mbr.sh data/test data/lang_test_bg exp/sgmm2_4a_mmi_b0.1/decode_bg_test_it3.mbr
|
|
@ -1,61 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Script for minimum bayes risk decoding.
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh;
|
||||
|
||||
# begin configuration section.
|
||||
cmd=run.pl
|
||||
min_lmwt=1
|
||||
max_lmwt=10
|
||||
#end configuration section.
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# -ne 3 ]; then
|
||||
echo "Usage: local/score_mbr.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
|
||||
echo " Options:"
|
||||
echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
|
||||
echo " --min_lmwt <int> # minumum LM-weight for lattice rescoring "
|
||||
echo " --max_lmwt <int> # maximum LM-weight for lattice rescoring "
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
lang_or_graph=$2
|
||||
dir=$3
|
||||
|
||||
symtab=$lang_or_graph/words.txt
|
||||
|
||||
for f in $symtab $dir/lat.1.gz $data/text; do
|
||||
[ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
mkdir -p $dir/scoring/log
|
||||
|
||||
phonemap="conf/phones.60-48-39.map"
|
||||
|
||||
cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
|
||||
|
||||
# We submit the jobs separately, not as an array, because it's hard
|
||||
# to get the inverse of the LM scales.
|
||||
rm $dir/.error 2>/dev/null
|
||||
for inv_acwt in `seq $min_lmwt $max_lmwt`; do
|
||||
acwt=`perl -e "print (1.0/$inv_acwt);"`
|
||||
$cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \
|
||||
lattice-mbr-decode --acoustic-scale=$acwt --word-symbol-table=$symtab \
|
||||
"ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \
|
||||
|| touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout.";
|
||||
|
||||
|
||||
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
|
||||
cat $dir/scoring/LMWT.tra \| \
|
||||
utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
|
||||
local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 \| \
|
||||
compute-wer --text --mode=present \
|
||||
ark:$dir/scoring/test_filt.txt ark,p:- ">" $dir/wer_LMWT || exit 1;
|
||||
|
|
@ -1,97 +1,109 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2013 (Author: Bagher BabaAli)
|
||||
# Copyright 2013 (Authors: Bagher BabaAli, Daniel Povey, Arnab Ghoshal)
|
||||
# Apache 2.0.
|
||||
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Argument should be the Timit directory, see ../run.sh for example."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
dir=`pwd`/data/local/data
|
||||
mkdir -p $dir
|
||||
lmdir=`pwd`/data/local/nist_lm
|
||||
mkdir -p $dir $lmdir
|
||||
local=`pwd`/local
|
||||
utils=`pwd`/utils
|
||||
conf=`pwd`/conf
|
||||
|
||||
. ./path.sh # Needed for KALDI_ROOT
|
||||
|
||||
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
|
||||
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
|
||||
if [ ! -x $sph2pipe ]; then
|
||||
echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
cd $dir
|
||||
[ -f $conf/test_spk.list ] || error_exit "$PROG: Eval-set speaker list not found.";
|
||||
[ -f $conf/dev_spk.list ] || error_exit "$PROG: dev-set speaker list not found.";
|
||||
|
||||
# Make directory of links to the TIMIT disk. This relies on the command
|
||||
# line arguments being absolute pathnames.
|
||||
rm -r links/ 2>/dev/null
|
||||
mkdir links/
|
||||
|
||||
ln -s $* links
|
||||
|
||||
# Do some basic checks that we have what we expected.
|
||||
if [ ! -d $*/TRAIN -o ! -d $*/TEST ]; then
|
||||
# First check if the train & test directories exist (these can either be upper-
|
||||
# or lower-cased
|
||||
if [ ! -d $*/TRAIN -o ! -d $*/TEST ] && [ ! -d $*/train -o ! -d $*/test ]; then
|
||||
echo "timit_data_prep.sh: Spot check of command line argument failed"
|
||||
echo "Command line argument must be absolute pathname to TIMIT directory"
|
||||
echo "with name like /export/corpora5/LDC/LDC93S1/timit/TIMIT"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
# This version for TRAIN
|
||||
# Now check what case the directory structure is
|
||||
uppercased=false
|
||||
train_dir=train
|
||||
test_dir=test
|
||||
if [ -d $*/TRAIN ]; then
|
||||
[ -d $*/train -o -d $*/test ] \
|
||||
&& echo "Error: Found both upper- & lower-cased directories" && exit 1;
|
||||
uppercased=true
|
||||
train_dir=TRAIN
|
||||
test_dir=TEST
|
||||
fi
|
||||
|
||||
TrainDir=$*/TRAIN
|
||||
find -L $TrainDir \( -iname '*.WAV' -o -iname '*.wav' \) > train.flist
|
||||
nl=`cat train.flist | wc -l`
|
||||
[ "$nl" -eq 4620 ] || echo "Warning: expected 4620 lines in train.flist, got $nl"
|
||||
tmpdir=$(mktemp -d);
|
||||
trap 'rm -rf "$tmpdir"' EXIT
|
||||
|
||||
# Now for the TEST.
|
||||
# Get the list of speakers. The list of speakers in the 24-speaker core test
|
||||
# set and the 50-speaker development set must be supplied to the script. All
|
||||
# speakers in the 'train' directory are used for training.
|
||||
if $uppercased; then
|
||||
tr '[:lower:]' '[:upper:]' < $conf/dev_spk.list > $tmpdir/dev_spk
|
||||
tr '[:lower:]' '[:upper:]' < $conf/test_spk.list > $tmpdir/test_spk
|
||||
ls -d "$*"/TRAIN/DR*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
|
||||
else
|
||||
tr '[:upper:]' '[:lower:]' < $conf/dev_spk.list > $tmpdir/dev_spk
|
||||
tr '[:upper:]' '[:lower:]' < $conf/test_spk.list > $tmpdir/test_spk
|
||||
ls -d "$*"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
|
||||
fi
|
||||
|
||||
TestDir=$*/TEST
|
||||
find -L $TestDir \( -iname '*.WAV' -o -iname '*.wav' \) > test.flist
|
||||
cd $dir
|
||||
for x in train dev test; do
|
||||
# First, find the list of audio files (use only si & sx utterances).
|
||||
# Note: train & test sets are under different directories, but doing find on
|
||||
# both and grepping for the speakers will work correctly.
|
||||
|
||||
nl=`cat test.flist | wc -l`
|
||||
[ "$nl" -eq 1680 ] || echo "Warning: expected 1680 lines in test.flist, got $nl"
|
||||
find $*/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.WAV' \
|
||||
| grep -f $tmpdir/${x}_spk > ${x}_sph.flist
|
||||
|
||||
sed -e 's:.*/\(.*\)/\(.*\).WAV$:\1_\2:i' ${x}_sph.flist \
|
||||
> $tmpdir/${x}_sph.uttids
|
||||
paste $tmpdir/${x}_sph.uttids ${x}_sph.flist \
|
||||
| sort -k1,1 > ${x}_sph.scp
|
||||
|
||||
# Finding the transcript files:
|
||||
find -L $TrainDir \( -iname '*.PHN' -o -iname '*.phn' \) > train_phn.flist
|
||||
find -L $TestDir \( -iname '*.PHN' -o -iname '*.phn' \) > test_phn.flist
|
||||
cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids
|
||||
|
||||
# Convert the transcripts into our format (no normalization yet)
|
||||
for x in train test; do
|
||||
$local/timit_flist2scp.pl $x.flist | sort > ${x}_sph.scp
|
||||
cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids
|
||||
cat ${x}.uttids | $local/timit_find_transcripts.pl ${x}_phn.flist > ${x}_phn.trans
|
||||
done
|
||||
# Now, Convert the transcripts into our format (no normalization yet)
|
||||
# Get the transcripts: each line of the output contains an utterance
|
||||
# ID followed by the transcript.
|
||||
find $*/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.PHN' \
|
||||
| grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
|
||||
sed -e 's:.*/\(.*\)/\(.*\).PHN$:\1_\2:i' $tmpdir/${x}_phn.flist \
|
||||
> $tmpdir/${x}_phn.uttids
|
||||
while read line; do
|
||||
[ -f $line ] || error_exit "Cannot find transcription file '$line'";
|
||||
cut -f3 -d' ' "$line" | tr '\n' ' ' | sed -e 's: *$:\n:'
|
||||
done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
|
||||
paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
|
||||
| sort -k1,1 > ${x}.trans
|
||||
|
||||
# Do normalization steps.
|
||||
cat train_phn.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to 48 | sort > train.txt || exit 1;
|
||||
cat ${x}.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to 39 | sort > $x.txt || exit 1;
|
||||
|
||||
|
||||
for x in test; do
|
||||
cat ${x}_phn.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to 39 | sort > $x.txt || exit 1;
|
||||
done
|
||||
|
||||
# Create scp's with wav's.
|
||||
for x in train test; do
|
||||
awk '{printf("%s '$sph2pipe' -f wav %s |\n", $1, $2);}' < ${x}_sph.scp > ${x}_wav.scp
|
||||
done
|
||||
|
||||
# Make the utt2spk and spk2utt files.
|
||||
for x in train test; do
|
||||
cut -f1 -d'_' $x.uttids | paste -d' ' $x.uttids - > $x.utt2spk
|
||||
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
|
||||
|
||||
cut -f1 -d'_' $x.uttids | paste -d' ' $x.uttids - > $x.utt2spk
|
||||
cat $x.utt2spk | $utils/utt2spk_to_spk2utt.pl > $x.spk2utt || exit 1;
|
||||
|
||||
cat $x.spk2utt | awk '{print $1}' | perl -ane 'chop; m:^.:; print "$_ $&\n";' > $x.spk2gender
|
||||
done
|
||||
|
||||
# Make the spk2gender files.
|
||||
for x in train test; do
|
||||
cat $x.spk2utt | awk '{print $1}' | perl -ane 'chop; m:^.:; print "$_ $&\n";' > $x.spk2gender
|
||||
done
|
||||
|
||||
|
||||
|
||||
echo "Data preparation succeeded"
|
||||
echo "Data preparation succeeded"
|
|
@ -1,60 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
|
||||
# This program takes on its standard input a list of utterance
|
||||
# id's, one for each line. (e.g. 4k0c030a is a an utterance id).
|
||||
# It takes as
|
||||
# Extracts from the phn files the transcripts for a given
|
||||
# dataset (represented by a file list).
|
||||
#
|
||||
|
||||
@ARGV == 1 || die "timit_find_transcripts.pl phn_trans_flist < utterance_ids > transcripts";
|
||||
$phn_flist = shift @ARGV;
|
||||
|
||||
open(L, "<$phn_flist") || die "Opening file list of phn files: $phn_flist\n";
|
||||
while(<L>){
|
||||
chop;
|
||||
m:^\S+/(\w+)/(\w+)\.[pP][hH][nN]$: || die "Bad line in phn file list: $_";
|
||||
$spk = $1 . "_" . $2;
|
||||
$spk2phn{$spk} = $_;
|
||||
}
|
||||
|
||||
%utt2trans = { };
|
||||
while(<STDIN>){
|
||||
chop;
|
||||
$uttid = $_;
|
||||
$uttid =~ m:(\w+)_(\w+): || die "Bad utterance id $_";
|
||||
$phnfile = $spk2phn{$uttid};
|
||||
defined $phnfile || die "No phn file for speaker $spk\n";
|
||||
open(F, "<$phnfile") || die "Error opening phn file $phnfile\n";
|
||||
@trans = "";
|
||||
while(<F>) {
|
||||
$_ =~ m:\d+\s\d+\s(.+)$: || die "Bad line $_ in phn file $phnfile (line $.)\n";
|
||||
push (@trans,$1);
|
||||
}
|
||||
$utt2trans{$uttid} = join(" ",@trans);
|
||||
|
||||
if(!defined $utt2trans{$uttid}) {
|
||||
print STDERR "No transcript for utterance $uttid (current phn file is $phnfile)\n";
|
||||
} else {
|
||||
print "$uttid $utt2trans{$uttid}\n";
|
||||
}
|
||||
close(F);
|
||||
}
|
||||
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# takes in a file list with lines like
|
||||
# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
|
||||
# and outputs an scp in kaldi format with lines like
|
||||
# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
|
||||
# (the first thing is the utterance-id, which is the same as the basename of the file.
|
||||
|
||||
|
||||
while(<>){
|
||||
m:^\S+/(\w+)/(\w+)\.[wW][aA][vV]$: || die "Bad line $_";
|
||||
$id = $1 . "_" . $2;
|
||||
print "$id $_";
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||
# Copyright 2013 (Author: Daniel Povey)
|
||||
# Apache 2.0
|
||||
|
||||
# This script takes data prepared in a corpus-dependent way
|
||||
|
@ -9,14 +9,14 @@
|
|||
|
||||
. ./path.sh || exit 1;
|
||||
|
||||
echo "Preparing train and test data"
|
||||
echo "Preparing train, dev and test data"
|
||||
srcdir=data/local/data
|
||||
lmdir=data/local/nist_lm
|
||||
tmpdir=data/local/lm_tmp
|
||||
lexicon=data/local/dict/lexicon.txt
|
||||
mkdir -p $tmpdir
|
||||
|
||||
for x in train test; do
|
||||
for x in train dev test; do
|
||||
mkdir -p data/$x
|
||||
cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
|
||||
cp $srcdir/$x.txt data/$x/text || exit 1;
|
||||
|
@ -25,7 +25,6 @@ for x in train test; do
|
|||
utils/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1;
|
||||
done
|
||||
|
||||
|
||||
# Next, for each type of language model, create the corresponding FST
|
||||
# and the corresponding lang_test_* directory.
|
||||
|
||||
|
@ -53,10 +52,10 @@ for lm_suffix in bg; do
|
|||
# Because of the <s> fiasco for these particular LMs, the first number is not
|
||||
# as close to zero as it could be.
|
||||
|
||||
# Everything below is only for diagnostic.
|
||||
# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
|
||||
# this might cause determinization failure of CLG.
|
||||
# #0 is treated as an empty word.
|
||||
# Everything below is only for diagnostic.
|
||||
# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
|
||||
# this might cause determinization failure of CLG.
|
||||
# #0 is treated as an empty word.
|
||||
mkdir -p $tmpdir/g
|
||||
awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
|
||||
< "$lexicon" >$tmpdir/g/select_empty.fst.txt
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2013 (Author: Bagher BabaAli)
|
||||
# Copyright 2013 (Authors: Daniel Povey, Bagher BabaAli)
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -60,9 +60,7 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
|
|||
$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
|
||||
>> $dir/extra_questions.txt || exit 1;
|
||||
|
||||
|
||||
# (2) Create the phone bigram LM
|
||||
#(
|
||||
[ -z "$IRSTLM" ] && \
|
||||
echo "LM building won't work without setting the IRSTLM env variable" && exit 1;
|
||||
! which build-lm.sh 2>/dev/null && \
|
||||
|
@ -76,8 +74,4 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
|
|||
compile-lm $tmpdir/lm_phone_bg.ilm.gz --text yes /dev/stdout | \
|
||||
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
|
||||
|
||||
#) >& data/prepare_lm.log
|
||||
|
||||
|
||||
echo "Dictionary preparation succeeded"
|
||||
|
||||
echo "Dictionary & language model preparation succeeded"
|
|
@ -5,13 +5,27 @@
|
|||
. ./cmd.sh
|
||||
[ -f path.sh ] && . ./path.sh
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=2500
|
||||
numGaussTri1=15000
|
||||
numLeavesMLLT=2500
|
||||
numGaussMLLT=15000
|
||||
numLeavesSAT=2500
|
||||
numGaussSAT=15000
|
||||
numGaussUBM=400
|
||||
numLeavesSGMM=7000
|
||||
numGaussSGMM=9000
|
||||
|
||||
decode_nj=20
|
||||
train_nj=30
|
||||
|
||||
echo ============================================================================
|
||||
echo " Data & Lexicon & Language Preparation "
|
||||
echo ============================================================================
|
||||
|
||||
timit=/export/corpora5/LDC/LDC93S1/timit/TIMIT
|
||||
|
||||
local/timit_data_prep.sh $timit || exit 1;
|
||||
local/timit_data_prep.sh $timit || exit 1;
|
||||
|
||||
local/timit_prepare_dict.sh || exit 1;
|
||||
|
||||
|
@ -21,119 +35,178 @@ utils/prepare_lang.sh --position-dependent-phones false --num-sil-states 3 \
|
|||
local/timit_format_data.sh || exit 1;
|
||||
|
||||
echo ============================================================================
|
||||
echo " MFCC Feature Extration & CMVN for Training and Test set "
|
||||
echo " MFCC Feature Extration & CMVN for Training and Test set "
|
||||
echo ============================================================================
|
||||
|
||||
# Now make MFCC features.
|
||||
mfccdir=mfcc
|
||||
for x in test train; do
|
||||
steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 \
|
||||
data/$x exp/make_mfcc/$x $mfccdir || exit 1;
|
||||
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
|
||||
use_pitch=false
|
||||
use_ffv=false
|
||||
|
||||
for x in train dev test; do
|
||||
steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/$x exp/make_mfcc/$x $mfccdir || exit 1;
|
||||
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
|
||||
done
|
||||
|
||||
vecho ============================================================================
|
||||
echo ============================================================================
|
||||
echo " MonoPhone Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
steps/train_mono.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/mono || exit 1;
|
||||
steps/train_mono.sh --nj "$train_nj" --cmd "$train_cmd" data/train data/lang exp/mono || exit 1;
|
||||
|
||||
utils/mkgraph.sh --mono data/lang_test_bg exp/mono exp/mono/graph_bg || exit 1;
|
||||
utils/mkgraph.sh --mono data/lang_test_bg exp/mono exp/mono/graph || exit 1;
|
||||
|
||||
steps/decode.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/mono/graph_bg data/test exp/mono/decode_bg_test || exit 1;
|
||||
steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/mono/graph data/dev exp/mono/decode_dev || exit 1;
|
||||
|
||||
steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/mono/graph data/test exp/mono/decode_test || exit 1;
|
||||
|
||||
echo ============================================================================
|
||||
echo " tri1 : Deltas + Delta-Deltas Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
steps/align_si.sh --boost-silence 1.25 --nj 30 --cmd "$train_cmd" \
|
||||
steps/align_si.sh --boost-silence 1.25 --nj "$train_nj" --cmd "$train_cmd" \
|
||||
data/train data/lang exp/mono exp/mono_ali || exit 1;
|
||||
|
||||
# Train tri1, which is deltas + delta-deltas, on train data.
|
||||
steps/train_deltas.sh --cmd "$train_cmd" \
|
||||
2500 15000 data/train data/lang exp/mono_ali exp/tri1 || exit 1;
|
||||
$numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri1 exp/tri1/graph_bg || exit 1;
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri1 exp/tri1/graph || exit 1;
|
||||
|
||||
steps/decode.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/tri1/graph_bg data/test exp/tri1/decode_bg_test || exit 1;
|
||||
steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/tri1/graph data/dev exp/tri1/decode_dev || exit 1;
|
||||
|
||||
steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/tri1/graph data/test exp/tri1/decode_test || exit 1;
|
||||
|
||||
echo ============================================================================
|
||||
echo " tri2 : LDA + MLLT Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
steps/align_si.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri1 exp/tri1_ali_train || exit 1;
|
||||
steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
|
||||
|
||||
steps/train_lda_mllt.sh --cmd "$train_cmd" \
|
||||
--splice-opts "--left-context=3 --right-context=3" \
|
||||
2500 15000 data/train data/lang exp/tri1_ali_train exp/tri2 || exit 1;
|
||||
$numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri2 exp/tri2/graph_bg || exit 1;
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri2 exp/tri2/graph || exit 1;
|
||||
|
||||
steps/decode.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/tri2/graph_bg data/test exp/tri2/decode_bg_test || exit 1;
|
||||
steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1;
|
||||
|
||||
steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/tri2/graph data/test exp/tri2/decode_test || exit 1;
|
||||
|
||||
echo ============================================================================
|
||||
echo " tri3 : LDA + MLLT + SAT Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
# Align tri2 system with train data.
|
||||
steps/align_si.sh --nj 30 --cmd "$train_cmd" \
|
||||
--use-graphs true data/train data/lang exp/tri2 exp/tri2_ali_train || exit 1;
|
||||
steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \
|
||||
--use-graphs true data/train data/lang exp/tri2 exp/tri2_ali || exit 1;
|
||||
|
||||
# From tri2 system, train tri3 which is LDA + MLLT + SAT.
|
||||
steps/train_sat.sh --cmd "$train_cmd" \
|
||||
2500 15000 data/train data/lang exp/tri2_ali_train exp/tri3 || exit 1;
|
||||
$numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri3 exp/tri3/graph_bg || exit 1;
|
||||
utils/mkgraph.sh data/lang_test_bg exp/tri3 exp/tri3/graph || exit 1;
|
||||
|
||||
steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" \
|
||||
exp/tri3/graph_bg data/test exp/tri3/decode_bg_test || exit 1;
|
||||
steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/tri3/graph data/dev exp/tri3/decode_dev || exit 1;
|
||||
|
||||
steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \
|
||||
exp/tri3/graph data/test exp/tri3/decode_test || exit 1;
|
||||
|
||||
echo ============================================================================
|
||||
echo " SGMM2 Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri3 exp/tri3_ali_train || exit 1;
|
||||
steps/align_fmllr.sh --nj "$train_nj" --cmd "$train_cmd" \
|
||||
data/train data/lang exp/tri3 exp/tri3_ali || exit 1;
|
||||
|
||||
steps/train_ubm.sh --cmd "$train_cmd" \
|
||||
400 data/train data/lang exp/tri3_ali_train exp/ubm4 || exit 1;
|
||||
$numGaussUBM data/train data/lang exp/tri3_ali exp/ubm4 || exit 1;
|
||||
|
||||
steps/train_sgmm2.sh --cmd "$train_cmd" 7000 9000 \
|
||||
data/train data/lang exp/tri3_ali_train exp/ubm4/final.ubm exp/sgmm2_4 || exit 1;
|
||||
steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \
|
||||
data/train data/lang exp/tri3_ali exp/ubm4/final.ubm exp/sgmm2_4 || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4 exp/sgmm2_4/graph_bg || exit 1;
|
||||
utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4 exp/sgmm2_4/graph || exit 1;
|
||||
|
||||
steps/decode_sgmm2.sh --nj 30 --cmd "$decode_cmd"\
|
||||
--transform-dir exp/tri3/decode_bg_test exp/sgmm2_4/graph_bg data/test \
|
||||
exp/sgmm2_4/decode_bg_test || exit 1;
|
||||
steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\
|
||||
--transform-dir exp/tri3/decode_dev exp/sgmm2_4/graph data/dev \
|
||||
exp/sgmm2_4/decode_dev || exit 1;
|
||||
|
||||
steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\
|
||||
--transform-dir exp/tri3/decode_test exp/sgmm2_4/graph data/test \
|
||||
exp/sgmm2_4/decode_test || exit 1;
|
||||
|
||||
echo ============================================================================
|
||||
echo " MMI + SGMM2 Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3_ali_train --use-graphs true --use-gselect true data/train \
|
||||
data/lang exp/sgmm2_4 exp/sgmm2_4_ali_train || exit 1;
|
||||
steps/align_sgmm2.sh --nj "$train_nj" --cmd "$train_cmd" \
|
||||
--transform-dir exp/tri3_ali --use-graphs true --use-gselect true data/train \
|
||||
data/lang exp/sgmm2_4 exp/sgmm2_4_ali || exit 1;
|
||||
|
||||
steps/make_denlats_sgmm2.sh --nj 30 --sub-split 30 --cmd "$decode_cmd"\
|
||||
--transform-dir exp/tri3_ali_train data/train data/lang exp/sgmm2_4_ali_train \
|
||||
exp/sgmm2_4_denlats_train || exit 1;
|
||||
steps/make_denlats_sgmm2.sh --nj "$train_nj" --sub-split "$train_nj" --cmd "$decode_cmd"\
|
||||
--transform-dir exp/tri3_ali data/train data/lang exp/sgmm2_4_ali \
|
||||
exp/sgmm2_4_denlats || exit 1;
|
||||
|
||||
steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" \
|
||||
--transform-dir exp/tri3_ali_train --boost 0.1 --zero-if-disjoint true \
|
||||
data/train data/lang exp/sgmm2_4_ali_train exp/sgmm2_4_denlats_train \
|
||||
exp/sgmm2_4_mmi_b0.1_z || exit 1;
|
||||
--transform-dir exp/tri3_ali --boost 0.1 --zero-if-disjoint true \
|
||||
data/train data/lang exp/sgmm2_4_ali exp/sgmm2_4_denlats \
|
||||
exp/sgmm2_4_mmi_b0.1 || exit 1;
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri3/decode_bg_test data/lang_test_bg data/test \
|
||||
exp/sgmm2_4/decode_bg_test exp/sgmm2_4_mmi_b0.1_z/decode_bg_test_it$iter || exit 1;
|
||||
--transform-dir exp/tri3/decode_dev data/lang_test_bg data/dev \
|
||||
exp/sgmm2_4/decode_dev exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter || exit 1;
|
||||
|
||||
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri3/decode_test data/lang_test_bg data/test \
|
||||
exp/sgmm2_4/decode_test exp/sgmm2_4_mmi_b0.1/decode_test_it$iter || exit 1;
|
||||
done
|
||||
|
||||
echo ============================================================================
|
||||
echo " DNN Hybrid Training & Decoding "
|
||||
echo ============================================================================
|
||||
|
||||
# DNN hybrid system training parameters
|
||||
dnn_mem_reqs="mem_free=1.0G,ram_free=0.2G"
|
||||
dnn_extra_opts="--num_epochs 20 --num-epochs-extra 10 --add-layers-period 1 --shrink-interval 3"
|
||||
|
||||
steps/train_nnet_cpu.sh --mix-up 5000 --initial-learning-rate 0.015 \
|
||||
--final-learning-rate 0.002 --num-hidden-layers 2 --num-parameters 1500000 \
|
||||
--num-jobs-nnet "$train_nj" --cmd "$train_cmd" "${dnn_train_extra_opts[@]}" \
|
||||
data/train data/lang exp/tri3_ali exp/tri4_nnet || exit 1;
|
||||
|
||||
decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=0.7G")
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \
|
||||
--transform-dir exp/tri3/decode_dev exp/tri3/graph data/dev \
|
||||
exp/tri4_nnet/decode_dev | tee exp/tri4_nnet/decode_dev/decode.log
|
||||
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \
|
||||
--transform-dir exp/tri3/decode_test exp/tri3/graph data/test \
|
||||
exp/tri4_nnet/decode_test | tee exp/tri4_nnet/decode_test/decode.log
|
||||
|
||||
echo ============================================================================
|
||||
echo " System Combination (DNN+SGMM) "
|
||||
echo ============================================================================
|
||||
|
||||
for iter in 1 2 3 4; do
|
||||
local/score_combine.sh --cmd "$decode_cmd" \
|
||||
data/dev data/lang_test_bg exp/tri4_nnet/decode_dev \
|
||||
exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter exp/combine_2/decode_dev_it$iter
|
||||
|
||||
local/score_combine.sh --cmd "$decode_cmd" \
|
||||
data/test data/lang_test_bg exp/tri4_nnet/decode_test \
|
||||
exp/sgmm2_4_mmi_b0.1/decode_test_it$iter exp/combine_2/decode_test_it$iter
|
||||
done
|
||||
|
||||
|
||||
echo ============================================================================
|
||||
echo " Getting Results [see RESULTS file] "
|
||||
echo ============================================================================
|
||||
|
@ -142,8 +215,8 @@ for x in exp/*/decode*; do
|
|||
[ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh
|
||||
done
|
||||
|
||||
exit 0;
|
||||
|
||||
|
||||
|
||||
echo ============================================================================
|
||||
echo "Finished successfully on" `date`
|
||||
echo ============================================================================
|
||||
|
||||
exit 0
|
|
@ -13,6 +13,7 @@ stage=0
|
|||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
beam=10
|
||||
retry_beam=40
|
||||
|
||||
align_to_lats=false # optionally produce alignment in lattice format
|
||||
lats_decode_opts="--acoustic-scale=0.1 --beam=20 --latbeam=10"
|
||||
lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1"
|
||||
|
@ -48,22 +49,19 @@ sdata=$data/split$nj
|
|||
|
||||
cp $srcdir/{tree,final.mdl} $dir || exit 1;
|
||||
|
||||
#Get the files we will need
|
||||
# Select default locations to model files
|
||||
nnet=$srcdir/final.nnet;
|
||||
[ ! -s "$nnet" ] && echo "Missing nnet '$nnet'" && exit 1;
|
||||
|
||||
class_frame_counts=$srcdir/ali_train_pdf.counts
|
||||
[ ! -s "$class_frame_counts" ] && echo "Missing class_frame_counts '$class_frame_counts'" && exit 1;
|
||||
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
[ ! -s $feature_transform ] && echo "Missing feature_transform '$feature_transform'" && exit 1
|
||||
|
||||
model=$dir/final.mdl
|
||||
[ ! -s "$model" ] && echo "Missing transtion-model '$model'" && exit 1;
|
||||
|
||||
###
|
||||
### Prepare feature pipeline (same as for decoding)
|
||||
###
|
||||
# Check that files exist
|
||||
for f in $sdata/1/feats.scp $sdata/1/text $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do
|
||||
[ ! -f $f ] && echo "$0: missing file $f" && exit 1;
|
||||
done
|
||||
|
||||
|
||||
# PREPARE FEATURE EXTRACTION PIPELINE
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
|
@ -77,15 +75,11 @@ if [ -f $srcdir/delta_order ]; then
|
|||
delta_order=$(cat $srcdir/delta_order)
|
||||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
# Finally add feature_transform and the MLP
|
||||
feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
|
||||
###
|
||||
###
|
||||
###
|
||||
|
||||
echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"
|
||||
|
||||
|
||||
echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"
|
||||
# Map oovs in reference transcription
|
||||
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
|
||||
# We could just use align-mapped in the next line, but it's less efficient as it compiles the
|
||||
|
|
|
@ -87,8 +87,8 @@ cp $cmvndir/cmvn_$name.scp $data/cmvn.scp || exit 1;
|
|||
nc=`cat $data/cmvn.scp | wc -l`
|
||||
nu=`cat $data/spk2utt | wc -l`
|
||||
if [ $nc -ne $nu ]; then
|
||||
echo "Error: it seems not all of the speakers got cmvn stats ($nc != $nu);"
|
||||
exit 1;
|
||||
echo "$0: warning: it seems not all of the speakers got cmvn stats ($nc != $nu);"
|
||||
[ $nc -eq 0 ] && exit 1;
|
||||
fi
|
||||
|
||||
echo "Succeeded creating CMVN stats for $name"
|
||||
|
|
|
@ -3,25 +3,29 @@
|
|||
# Copyright 2012-2013 Karel Vesely, Daniel Povey
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration section.
|
||||
nnet= # Optionally pre-select network to use for getting state-likelihoods
|
||||
feature_transform= # Optionally pre-select feature transform (in front of nnet)
|
||||
model= # Optionally pre-select transition model
|
||||
class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
|
||||
# Begin configuration section.
|
||||
nnet= # non-default location of DNN (optional)
|
||||
feature_transform= # non-default location of feature_transform (optional)
|
||||
model= # non-default location of transition model (optional)
|
||||
class_frame_counts= # non-default location of PDF counts (optional)
|
||||
srcdir= # non-default location of DNN-dir (decouples model dir from decode dir)
|
||||
|
||||
stage=0 # stage=1 skips lattice generation
|
||||
nj=4
|
||||
cmd=run.pl
|
||||
max_active=7000 # maximum of active tokens
|
||||
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
|
||||
beam=13.0 # GMM:13.0
|
||||
latbeam=8.0 # GMM:6.0
|
||||
acwt=0.10 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
|
||||
scoring_opts="--min-lmwt 4 --max-lmwt 15"
|
||||
|
||||
acwt=0.10 # note: only really affects pruning (scoring is on lattices).
|
||||
beam=13.0
|
||||
latbeam=8.0
|
||||
max_active=7000 # limit of active tokens
|
||||
max_mem=50000000 # approx. limit to memory consumption during minimization in bytes
|
||||
|
||||
skip_scoring=false
|
||||
use_gpu_id=-1 # disable gpu
|
||||
parallel_opts="-pe smp 2" # use 2 CPUs (1 DNN-forward, 1 decoder)
|
||||
srcdir= # optionaly select dir with DNN model
|
||||
scoring_opts="--min-lmwt 4 --max-lmwt 15"
|
||||
|
||||
num_threads=1 # if >1, will use latgen-faster-parallel
|
||||
parallel_opts="-pe smp $((num_threads+1))" # use 2 CPUs (1 DNN-forward, 1 decoder)
|
||||
use_gpu_id=-1 # -1 disable gpu
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
@ -32,7 +36,7 @@ echo "$0 $@" # Print the command line for logging
|
|||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
|
||||
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
|
||||
echo " where the DNN + transition model is."
|
||||
echo " where the DNN and transition model is."
|
||||
echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
|
||||
echo ""
|
||||
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
|
||||
|
@ -44,13 +48,13 @@ if [ $# != 3 ]; then
|
|||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo ""
|
||||
echo " --nnet <nnet> # which nnet to use (opt.)"
|
||||
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
|
||||
echo " --class-frame-counts <file> # file with frame counts (used to compute priors) (opt.)"
|
||||
echo " --model <model> # which transition model to use (opt.)"
|
||||
echo " --nnet <nnet> # non-default location of DNN (opt.)"
|
||||
echo " --srcdir <dir> # non-default dir with DNN/models, can be different"
|
||||
echo " # from parent dir of <decode-dir>' (opt.)"
|
||||
echo ""
|
||||
echo " --acwt <float> # select acoustic scale for decoding"
|
||||
echo " --scoring-opts <opts> # options forwarded to local/score.sh"
|
||||
echo " --num-threads <N> # N>1: run multi-threaded decoder"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
@ -58,43 +62,31 @@ fi
|
|||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
[ -z $srcdir ] && srcdir=`dirname $dir`; # Or back-off to: model directory one level up from decoding directory.
|
||||
[ -z $srcdir ] && srcdir=`dirname $dir`; # Default model directory one level up from decoding directory.
|
||||
sdata=$data/split$nj;
|
||||
|
||||
mkdir -p $dir/log
|
||||
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
|
||||
nnet=$srcdir/final.nnet;
|
||||
fi
|
||||
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
|
||||
# Select default locations to model files (if not already set externally)
|
||||
if [ -z "$nnet" ]; then nnet=$srcdir/final.nnet; fi
|
||||
if [ -z "$model" ]; then model=$srcdir/final.mdl; fi
|
||||
if [ -z "$feature_transform" ]; then feature_transform=$srcdir/final.feature_transform; fi
|
||||
if [ -z "$class_frame_counts" ]; then class_frame_counts=$srcdir/ali_train_pdf.counts; fi
|
||||
|
||||
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
|
||||
model=$srcdir/final.mdl;
|
||||
fi
|
||||
|
||||
# find the feature_transform to use
|
||||
if [ -z "$feature_transform" ]; then
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
fi
|
||||
if [ ! -f $feature_transform ]; then
|
||||
echo "Missing feature_transform '$feature_transform'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
# Check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet $model $feature_transform $class_frame_counts $graphdir/HCLG.fst; do
|
||||
[ ! -f $f ] && echo "$0: missing file $f" && exit 1;
|
||||
done
|
||||
|
||||
# PREPARE THE LOG-POSTERIOR COMPUTATION PIPELINE
|
||||
if [ -z "$class_frame_counts" ]; then
|
||||
class_frame_counts=$srcdir/ali_train_pdf.counts
|
||||
else
|
||||
echo "Overriding class_frame_counts by $class_frame_counts"
|
||||
fi
|
||||
# Possibly use multi-threaded decoder
|
||||
thread_string=
|
||||
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
|
||||
|
||||
|
||||
# PREPARE FEATURE EXTRACTION PIPELINE
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
|
@ -109,13 +101,12 @@ if [ -f $srcdir/delta_order ]; then
|
|||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
|
||||
# Run the decoding in the queue
|
||||
if [ $stage -le 0 ]; then
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
|
||||
nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet "$feats" ark:- \| \
|
||||
latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
|
||||
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
latgen-faster-mapped$thread_string --max-active=$max_active --max-mem=$max_mem --beam=$beam \
|
||||
--lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
|
|
|
@ -79,28 +79,21 @@ else
|
|||
fi
|
||||
|
||||
|
||||
|
||||
#Get the files we will need
|
||||
cp $srcdir/{tree,final.mdl} $dir
|
||||
|
||||
# Select default locations to model files
|
||||
[ -z "$nnet" ] && nnet=$srcdir/final.nnet;
|
||||
[ ! -f "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
|
||||
|
||||
class_frame_counts=$srcdir/ali_train_pdf.counts
|
||||
[ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1;
|
||||
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
if [ ! -f $feature_transform ]; then
|
||||
echo "Missing feature_transform '$feature_transform'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
model=$dir/final.mdl
|
||||
[ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1;
|
||||
|
||||
###
|
||||
### Prepare feature pipeline (same as for decoding)
|
||||
###
|
||||
# Check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet $model $feature_transform $class_frame_counts; do
|
||||
[ ! -f $f ] && echo "$0: missing file $f" && exit 1;
|
||||
done
|
||||
|
||||
|
||||
# PREPARE FEATURE EXTRACTION PIPELINE
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
|
@ -114,28 +107,23 @@ if [ -f $srcdir/delta_order ]; then
|
|||
delta_order=$(cat $srcdir/delta_order)
|
||||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
# Finally add feature_transform and the MLP
|
||||
feats="$feats nnet-forward --feature-transform=$feature_transform --no-softmax=true --class-frame-counts=$class_frame_counts --use-gpu-id=$use_gpu_id $nnet ark:- ark:- |"
|
||||
###
|
||||
###
|
||||
###
|
||||
|
||||
|
||||
|
||||
###
|
||||
### We will produce lattices, where the correct path is not necessarily present
|
||||
###
|
||||
|
||||
#1) We don't use reference path here...
|
||||
|
||||
echo "Generating the denlats"
|
||||
#2) Generate the denominator lattices
|
||||
if [ $sub_split -eq 1 ]; then
|
||||
echo "$0: generating denlats from data '$data', putting lattices in '$dir'"
|
||||
#1) Generate the denominator lattices
|
||||
if [ $sub_split -eq 1 ]; then
|
||||
# Prepare 'scp' for storing lattices separately and gzipped
|
||||
for n in `seq $nj`; do
|
||||
[ ! -d $dir/lat$n ] && mkdir $dir/lat$n;
|
||||
cat $sdata/$n/feats.scp | awk '{ print $1" | gzip -c >'$dir'/lat'$n'/"$1".gz"; }'
|
||||
done >$dir/lat.store_separately_as_gz.scp
|
||||
# Generate the lattices
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
|
||||
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||
$dir/dengraph/HCLG.fst "$feats" "ark,scp:$dir/lat.JOB.ark,$dir/lat.JOB.scp" || exit 1;
|
||||
$dir/dengraph/HCLG.fst "$feats" "scp:$dir/lat.store_separately_as_gz.scp" || exit 1;
|
||||
else
|
||||
for n in `seq $nj`; do
|
||||
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
|
||||
|
@ -146,32 +134,25 @@ else
|
|||
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
|
||||
fi
|
||||
mkdir -p $dir/log/$n
|
||||
mkdir -p $dir/part
|
||||
feats_subset=$(echo $feats | sed s:JOB/:$n/split$sub_split/JOB/:g)
|
||||
# Prepare 'scp' for storing lattices separately and gzipped
|
||||
for k in `seq $sub_split`; do
|
||||
[ ! -d $dir/lat$n/$k ] && mkdir -p $dir/lat$n/$k;
|
||||
cat $sdata2/$k/feats.scp | awk '{ print $1" | gzip -c >'$dir'/lat'$n'/'$k'/"$1".gz"; }'
|
||||
done >$dir/lat.$n.store_separately_as_gz.scp
|
||||
# Generate lattices
|
||||
$cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
|
||||
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||
$dir/dengraph/HCLG.fst "$feats_subset" "ark,scp:$dir/lat.$n.JOB.ark,$dir/lat.$n.JOB.scp" || exit 1;
|
||||
echo Merging lists for data subset $n
|
||||
for k in `seq $sub_split`; do
|
||||
cat $dir/lat.$n.$k.scp
|
||||
done > $dir/lat.$n.all.scp
|
||||
echo Merge the ark $n
|
||||
lattice-copy scp:$dir/lat.$n.all.scp ark,scp:$dir/lat.$n.ark,$dir/lat.$n.scp || exit 1;
|
||||
#remove the data
|
||||
rm $dir/lat.$n.*.ark $dir/lat.$n.*.scp $dir/lat.$n.all.scp
|
||||
$dir/dengraph/HCLG.fst "$feats_subset" scp:$dir/lat.$n.store_separately_as_gz.scp || exit 1;
|
||||
touch $dir/.done.$n
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
|
||||
#3) Merge the SCPs to create full list of lattices (will use random access)
|
||||
echo Merging to single list $dir/lat.scp
|
||||
for ((n=1; n<=nj; n++)); do
|
||||
cat $dir/lat.$n.scp
|
||||
done > $dir/lat.scp
|
||||
|
||||
#2) Generate 'scp' for reading the lattices
|
||||
for n in `seq $nj`; do
|
||||
find $dir/lat${n} -name *.gz | awk -v FS="/" '{ print gensub(".gz","","",$NF)" gunzip -c "$0" |"; }'
|
||||
done >$dir/lat.scp
|
||||
|
||||
echo "$0: done generating denominator lattices."
|
||||
|
|
|
@ -51,7 +51,7 @@ realign_iters=""
|
|||
beam=10 # for realignment.
|
||||
retry_beam=40
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
||||
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
|
||||
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
|
||||
nnet_config_opts=
|
||||
splice_width=4 # meaning +- 4 frames on each side for second LDA
|
||||
|
@ -110,8 +110,9 @@ if [ $# != 4 ]; then
|
|||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
|
|
|
@ -35,6 +35,7 @@ for ((n=1; n<$#; n++)); do
|
|||
if [ "${!n}" == "--config" ]; then
|
||||
n_plus1=$((n+1))
|
||||
config=${!n_plus1}
|
||||
[ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
|
||||
. $config # source the config file.
|
||||
fi
|
||||
done
|
||||
|
|
|
@ -86,7 +86,8 @@ mkdir -p $dir $tmpdir $dir/phones
|
|||
|
||||
[ -f path.sh ] && . ./path.sh
|
||||
|
||||
! utils/validate_dict_dir.pl $srcdir && echo "*Error validating directory $srcdir*" && exit 1;
|
||||
! utils/validate_dict_dir.pl $srcdir && \
|
||||
echo "*Error validating directory $srcdir*" && exit 1;
|
||||
|
||||
if [[ ! -f $srcdir/lexicon.txt ]]; then
|
||||
echo "**Creating $dir/lexicon.txt from $dir/lexiconp.txt"
|
||||
|
@ -97,8 +98,12 @@ if [[ ! -f $srcdir/lexiconp.txt ]]; then
|
|||
perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $srcdir/lexiconp.txt || exit 1;
|
||||
fi
|
||||
|
||||
! utils/validate_dict_dir.pl $srcdir >&/dev/null && \
|
||||
echo "Validation failed (second time)" && exit 1;
|
||||
if ! utils/validate_dict_dir.pl $srcdir >&/dev/null; then
|
||||
utils/validate_dict_dir.pl $srcdir # show the output.
|
||||
echo "Validation failed (second time)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
if $position_dependent_phones; then
|
||||
# Create $tmpdir/lexicon.original from $srcdir/lexicon.txt by
|
||||
|
|
|
@ -163,8 +163,8 @@ print Q "EOF\n"; # without having to escape things like "|" and quote characters
|
|||
print Q ") >$logfile\n";
|
||||
print Q "time1=\`date +\"%s\"\`\n";
|
||||
print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
|
||||
print Q "time2=\`date +\"%s\"\`\n";
|
||||
print Q "ret=\$?\n";
|
||||
print Q "time2=\`date +\"%s\"\`\n";
|
||||
print Q "echo '#' Accounting: time=\$((\$time2-\$time1)) threads=$nof_threads >>$logfile\n";
|
||||
print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
|
||||
print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137;
|
||||
|
@ -232,7 +232,7 @@ if (! $sync) { # We're not submitting with -sync y, so we
|
|||
# Check that the job exists in SGE. Job can be killed if duration
|
||||
# exceeds some hard limit, or in case of a machine shutdown.
|
||||
if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
|
||||
if ( -f $f ) { next; }; #syncfile appeared, ok
|
||||
if ( -f $f ) { next; }; #syncfile appeared: OK.
|
||||
$ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
|
||||
if($ret != 0) {
|
||||
# Don't consider immediately missing job as error, first wait some
|
||||
|
@ -245,7 +245,7 @@ if (! $sync) { # We're not submitting with -sync y, so we
|
|||
if ( -f $f ) { next; }; #syncfile appeared, ok
|
||||
#Otherwise it is an error
|
||||
if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
|
||||
print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
|
||||
print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile, syncfile is $f, return status of qstat was $ret\n";
|
||||
print STDERR " Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
|
||||
exit(1);
|
||||
}
|
||||
|
|
|
@ -195,7 +195,7 @@ if ( (-f "$dict/lexicon.txt") && (-f "$dict/lexiconp.txt")) {
|
|||
|
||||
# Checking extra_questions.txt -------------------------------
|
||||
print "Checking $dict/extra_questions.txt ...\n";
|
||||
if(-s "$dict/extra_questions.txt") {
|
||||
if (-s "$dict/extra_questions.txt") {
|
||||
if(!open(EX, "<$dict/extra_questions.txt")) {$exit = 1; print "--> ERROR: fail to open $dict/extra_questions.txt\n";}
|
||||
$idx = 1;
|
||||
$success = 1;
|
||||
|
@ -213,6 +213,8 @@ if(-s "$dict/extra_questions.txt") {
|
|||
}
|
||||
close(EX);
|
||||
$success == 0 || print "--> $dict/extra_questions.txt is OK\n";
|
||||
} else {print "--> $dict/extra_phones.txt is empty\n";}
|
||||
} else { print "--> $dict/extra_questions.txt is empty (this is OK)\n";}
|
||||
|
||||
if($exit == 1) { print " [Error detected ]\n"; exit 1;}
|
||||
|
||||
exit 0;
|
||||
|
|
|
@ -141,6 +141,6 @@ preprocessor variables, setting compile options, linking with libraries, and so
|
|||
\section build_setup_platforms Which platforms has Kaldi been compiled on?
|
||||
|
||||
We have compiled Kaldi on Windows, Cygwin, various flavors of Linux (including
|
||||
Ubuntu and CentOS), and Darwin.
|
||||
Ubuntu, CentOS and SUSE), and Darwin.
|
||||
|
||||
*/
|
||||
|
|
|
@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
SlidingWindowCmn(opts, feat, &cmvn_feat);
|
||||
|
||||
feat_writer.Write(utt, feat);
|
||||
feat_writer.Write(utt, cmvn_feat);
|
||||
num_done++;
|
||||
}
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ template<class IntType> class LatticeStringRepository {
|
|||
inline bool operator == (const Entry &other) const {
|
||||
return (parent == other.parent && i == other.i);
|
||||
}
|
||||
Entry(const Entry *parent, IntType i): parent(parent), i(i) {}
|
||||
Entry() { }
|
||||
Entry(const Entry &e): parent(e.parent), i(e.i) {}
|
||||
};
|
||||
// Note: all Entry* pointers returned in function calls are
|
||||
|
@ -59,16 +59,22 @@ template<class IntType> class LatticeStringRepository {
|
|||
// Returns string of "parent" with i appended. Pointer
|
||||
// owned by repository
|
||||
const Entry *Successor(const Entry *parent, IntType i) {
|
||||
Entry entry(parent, i);
|
||||
typename SetType::iterator iter = set_.find(&entry);
|
||||
if(iter == set_.end()) { // no such entry already...
|
||||
Entry *entry_ptr = new Entry(entry);
|
||||
set_.insert(entry_ptr);
|
||||
return entry_ptr;
|
||||
} else {
|
||||
return *iter;
|
||||
new_entry_->parent = parent;
|
||||
new_entry_->i = i;
|
||||
|
||||
std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
|
||||
if (pr.second) { // Was successfully inserted (was not there). We need to
|
||||
// replace the element we inserted, which resides on the
|
||||
// stack, with one from the heap.
|
||||
const Entry *ans = new_entry_;
|
||||
new_entry_ = new Entry();
|
||||
return ans;
|
||||
} else { // Was not inserted because an equivalent Entry already
|
||||
// existed.
|
||||
return *pr.first;
|
||||
}
|
||||
}
|
||||
|
||||
const Entry *Concatenate (const Entry *a, const Entry *b) {
|
||||
if (a == NULL) return b;
|
||||
else if (b == NULL) return a;
|
||||
|
@ -94,15 +100,22 @@ template<class IntType> class LatticeStringRepository {
|
|||
// a common prefix with a.
|
||||
void ReduceToCommonPrefix(const Entry *a,
|
||||
vector<IntType> *b) {
|
||||
vector<IntType> a_vec;
|
||||
ConvertToVector(a, &a_vec);
|
||||
if (b->size() > a_vec.size())
|
||||
b->resize(a_vec.size());
|
||||
size_t b_sz = 0, max_sz = std::min(a_vec.size(), b->size());
|
||||
while (b_sz < max_sz && (*b)[b_sz] == a_vec[b_sz])
|
||||
b_sz++;
|
||||
if (b_sz != b->size())
|
||||
b->resize(b_sz);
|
||||
size_t a_size = Size(a), b_size = b->size();
|
||||
while (a_size> b_size) {
|
||||
a = a->parent;
|
||||
a_size--;
|
||||
}
|
||||
if (b_size > a_size)
|
||||
b_size = a_size;
|
||||
typename vector<IntType>::iterator b_begin = b->begin();
|
||||
while (a_size != 0) {
|
||||
if (a->i != *(b_begin + a_size - 1))
|
||||
b_size = a_size - 1;
|
||||
a = a->parent;
|
||||
a_size--;
|
||||
}
|
||||
if (b_size != b->size())
|
||||
b->resize(b_size);
|
||||
}
|
||||
|
||||
// removes the first n elements of a.
|
||||
|
@ -156,8 +169,8 @@ template<class IntType> class LatticeStringRepository {
|
|||
return e;
|
||||
}
|
||||
|
||||
LatticeStringRepository() { }
|
||||
|
||||
LatticeStringRepository() { new_entry_ = new Entry; }
|
||||
|
||||
void Destroy() {
|
||||
for (typename SetType::iterator iter = set_.begin();
|
||||
iter != set_.end();
|
||||
|
@ -165,6 +178,10 @@ template<class IntType> class LatticeStringRepository {
|
|||
delete *iter;
|
||||
SetType tmp;
|
||||
tmp.swap(set_);
|
||||
if (new_entry_) {
|
||||
delete new_entry_;
|
||||
new_entry_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuild will rebuild this object, guaranteeing only
|
||||
|
@ -220,6 +237,8 @@ template<class IntType> class LatticeStringRepository {
|
|||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
|
||||
Entry *new_entry_; // We always have a pre-allocated Entry ready to use,
|
||||
// to avoid unnecessary news and deletes.
|
||||
SetType set_;
|
||||
|
||||
};
|
||||
|
|
|
@ -310,7 +310,7 @@ inline int Compare (const LatticeWeightTpl<FloatType> &w1,
|
|||
|
||||
template<class FloatType>
|
||||
inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
|
||||
const LatticeWeightTpl<FloatType> &w2) {
|
||||
const LatticeWeightTpl<FloatType> &w2) {
|
||||
return (Compare(w1, w2) >= 0 ? w1 : w2);
|
||||
}
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ template<class Arc> VectorFst<Arc>* RandPairFst(RandFstOptions opts = RandFstOpt
|
|||
all_states.push_back(this_state);
|
||||
}
|
||||
// Set final states.
|
||||
for (size_t j = 0;j < (size_t)opts.n_final;j++) {
|
||||
for (size_t j = 0; j < (size_t)opts.n_final;j++) {
|
||||
StateId id = all_states[rand() % opts.n_states];
|
||||
Weight weight (opts.weight_multiplier*(rand() % 5), opts.weight_multiplier*(rand() % 5));
|
||||
fst->SetFinal(id, weight);
|
||||
|
|
|
@ -54,10 +54,10 @@ void DiagGmmNormal::CopyFromDiagGmm(const DiagGmm &diaggmm) {
|
|||
means_.MulElements(vars_);
|
||||
}
|
||||
|
||||
void DiagGmmNormal::CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags) {
|
||||
void DiagGmmNormal::CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags) const {
|
||||
KALDI_ASSERT((static_cast<int32>(diaggmm->Dim()) == means_.NumCols())
|
||||
&& (static_cast<int32>(diaggmm->weights_.Dim()) == weights_.Dim()));
|
||||
|
||||
&& (static_cast<int32>(diaggmm->weights_.Dim()) == weights_.Dim()));
|
||||
|
||||
DiagGmmNormal oldg(*diaggmm);
|
||||
|
||||
if (flags & kGmmWeights)
|
||||
|
|
|
@ -58,7 +58,7 @@ class DiagGmmNormal {
|
|||
void CopyFromDiagGmm(const DiagGmm &diaggmm);
|
||||
|
||||
/// Copies to DiagGmm the requested parameters
|
||||
void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags = kGmmAll);
|
||||
void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags = kGmmAll) const;
|
||||
|
||||
int32 NumGauss() { return weights_.Dim(); }
|
||||
int32 Dim() { return means_.NumCols(); }
|
||||
|
|
|
@ -821,6 +821,10 @@ BaseFloat DiagGmm::GaussianSelectionPreselect(
|
|||
return tot_loglike;
|
||||
}
|
||||
|
||||
void DiagGmm::CopyFromNormal(const DiagGmmNormal &diag_gmm_normal) {
|
||||
diag_gmm_normal.CopyToDiagGmm(this);
|
||||
}
|
||||
|
||||
void DiagGmm::Generate(VectorBase<BaseFloat> *output) {
|
||||
KALDI_ASSERT(static_cast<int32>(output->Dim()) == Dim());
|
||||
BaseFloat tot = weights_.Sum();
|
||||
|
|
|
@ -51,6 +51,9 @@ class DiagGmm {
|
|||
CopyFromDiagGmm(gmm);
|
||||
}
|
||||
|
||||
/// Copies from DiagGmmNormal; does not resize.
|
||||
void CopyFromNormal(const DiagGmmNormal &diag_gmm_normal);
|
||||
|
||||
DiagGmm(int32 nMix, int32 dim): valid_gconsts_(false) { Resize(nMix, dim); }
|
||||
|
||||
/// Constructor that allows us to merge GMMs with weights. Weights must sum
|
||||
|
|
|
@ -26,7 +26,7 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
|
|||
gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
|
||||
gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
|
||||
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
|
||||
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost
|
||||
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
// gmmbin/gmm-global-init-from-feats.cc
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/model-common.h"
|
||||
#include "gmm/full-gmm.h"
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "gmm/mle-full-gmm.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
// We initialize the GMM parameters by setting the variance to the global
|
||||
// variance of the features, and the means to distinct randomly chosen frames.
|
||||
void InitGmmFromRandomFrames(const Matrix<BaseFloat> &feats, DiagGmm *gmm) {
|
||||
int32 num_gauss = gmm->NumGauss(), num_frames = feats.NumRows(),
|
||||
dim = feats.NumCols();
|
||||
KALDI_ASSERT(num_frames >= 10 * num_gauss && "Too few frames to train on");
|
||||
Vector<double> mean(dim), var(dim);
|
||||
for (int32 i = 0; i < num_frames; i++) {
|
||||
mean.AddVec(1.0 / num_frames, feats.Row(i));
|
||||
var.AddVec2(1.0 / num_frames, feats.Row(i));
|
||||
}
|
||||
var.AddVec2(-1.0, mean);
|
||||
if (var.Max() <= 0.0)
|
||||
KALDI_ERR << "Features do not have positive variance " << var;
|
||||
|
||||
DiagGmmNormal gmm_normal(*gmm);
|
||||
|
||||
std::set<int32> used_frames;
|
||||
for (int32 g = 0; g < num_gauss; g++) {
|
||||
int32 random_frame = RandInt(0, num_frames - 1);
|
||||
while (used_frames.count(random_frame != 0))
|
||||
random_frame = RandInt(0, num_frames - 1);
|
||||
used_frames.insert(random_frame);
|
||||
gmm_normal.weights_(g) = 1.0 / num_gauss;
|
||||
gmm_normal.means_.Row(g).CopyFromVec(feats.Row(random_frame));
|
||||
gmm_normal.vars_.Row(g).CopyFromVec(var);
|
||||
}
|
||||
gmm->CopyFromNormal(gmm_normal);
|
||||
gmm->ComputeGconsts();
|
||||
}
|
||||
|
||||
void TrainOneIter(const Matrix<BaseFloat> &feats,
|
||||
const MleDiagGmmOptions &gmm_opts,
|
||||
int32 iter,
|
||||
DiagGmm *gmm) {
|
||||
AccumDiagGmm gmm_acc(*gmm, kGmmAll);
|
||||
|
||||
double tot_like = 0.0;
|
||||
|
||||
for (int32 t = 0; t < feats.NumRows(); t++)
|
||||
tot_like += gmm_acc.AccumulateFromDiag(*gmm, feats.Row(t), 1.0);
|
||||
|
||||
KALDI_LOG << "Likelihood per frame on iteration " << iter
|
||||
<< " was " << (tot_like / feats.NumRows()) << " over "
|
||||
<< feats.NumRows() << " frames.";
|
||||
|
||||
BaseFloat objf_change, count;
|
||||
MleDiagGmmUpdate(gmm_opts, gmm_acc, kGmmAll, gmm, &objf_change, &count);
|
||||
|
||||
KALDI_LOG << "Objective-function change on iteration " << iter << " was "
|
||||
<< (objf_change / count) << " over " << count << " frames.";
|
||||
}
|
||||
|
||||
} // namespace kaldi
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
|
||||
const char *usage =
|
||||
"This program initializes a single diagonal GMM and does multiple iterations of\n"
|
||||
"training from features stored in memory.\n"
|
||||
"Usage: gmm-global-init-feats [options] <feature-rspecifier> <model-out>\n"
|
||||
"e.g.: gmm-global-init-feats scp:train.scp 1.mdl\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
MleDiagGmmOptions gmm_opts;
|
||||
|
||||
bool binary = true;
|
||||
int32 num_gauss = 100;
|
||||
int32 num_iters = 50;
|
||||
int32 num_frames = 200000;
|
||||
int32 srand_seed = 0;
|
||||
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("num-gauss", &num_gauss, "Number of Gaussians in the model");
|
||||
po.Register("num-iters", &num_iters, "Number of iterations of training");
|
||||
po.Register("num-frames", &num_frames, "Number of feature vectors to store in "
|
||||
"memory and train on (randomly chosen from the input features)");
|
||||
po.Register("srand", &srand_seed, "Seed for random number generator ");
|
||||
|
||||
gmm_opts.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
srand(srand_seed);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string feature_rspecifier = po.GetArg(1),
|
||||
model_wxfilename = po.GetArg(2);
|
||||
|
||||
Matrix<BaseFloat> feats;
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
|
||||
|
||||
KALDI_ASSERT(num_frames > 0);
|
||||
|
||||
int64 num_read = 0, dim = 0;
|
||||
|
||||
KALDI_LOG << "Reading features (will keep " << num_frames << " frames.)";
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
const Matrix<BaseFloat> &this_feats = feature_reader.Value();
|
||||
for (int32 t = 0; t < this_feats.NumRows(); t++) {
|
||||
num_read++;
|
||||
if (dim == 0) {
|
||||
dim = this_feats.NumCols();
|
||||
feats.Resize(num_frames, dim);
|
||||
} else if (this_feats.NumCols() != dim) {
|
||||
KALDI_ERR << "Features have inconsistent dims "
|
||||
<< this_feats.NumCols() << " vs. " << dim
|
||||
<< " (current utt is) " << feature_reader.Key();
|
||||
}
|
||||
if (num_read <= num_frames) {
|
||||
feats.Row(num_read - 1).CopyFromVec(this_feats.Row(t));
|
||||
} else {
|
||||
BaseFloat keep_prob = num_frames / static_cast<BaseFloat>(num_read);
|
||||
if (WithProb(keep_prob)) { // With probability "keep_prob"
|
||||
feats.Row(RandInt(0, num_frames - 1)).CopyFromVec(this_feats.Row(t));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (num_read < num_frames) {
|
||||
KALDI_WARN << "Number of frames read " << num_read << " was less than "
|
||||
<< "target number " << num_frames << ", using all we read.";
|
||||
feats.Resize(num_read, dim, kCopyData);
|
||||
}
|
||||
|
||||
DiagGmm gmm(num_gauss, dim);
|
||||
|
||||
KALDI_LOG << "Initializing GMM means from random frames";
|
||||
InitGmmFromRandomFrames(feats, &gmm);
|
||||
|
||||
for (int32 iter = 0; iter < num_iters; iter++)
|
||||
TrainOneIter(feats, gmm_opts, iter, &gmm);
|
||||
|
||||
WriteKaldiObject(gmm, model_wxfilename, binary);
|
||||
KALDI_LOG << "Wrote model to " << model_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -5,11 +5,11 @@ include ../kaldi.mk
|
|||
|
||||
EXTRA_CXXFLAGS += -Wno-sign-compare
|
||||
|
||||
TESTFILES = kaldi-lattice-test
|
||||
TESTFILES = kaldi-lattice-test push-lattice-test minimize-lattice-test
|
||||
|
||||
OBJFILES = kaldi-lattice.o lattice-functions.o word-align-lattice.o \
|
||||
phone-align-lattice.o word-align-lattice-lexicon.o sausages.o \
|
||||
kws-functions.o push-lattice.o
|
||||
kws-functions.o push-lattice.o minimize-lattice.o
|
||||
|
||||
LIBNAME = kaldi-lat
|
||||
|
||||
|
|
|
@ -311,6 +311,7 @@ void ConvertLatticeToPhones(const TransitionModel &trans,
|
|||
void ConvertCompactLatticeToPhones(const TransitionModel &trans,
|
||||
CompactLattice *clat) {
|
||||
typedef CompactLatticeArc Arc;
|
||||
typedef Arc::Weight Weight;
|
||||
int32 num_states = clat->NumStates();
|
||||
for (int32 state = 0; state < num_states; state++) {
|
||||
for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
|
||||
|
@ -327,6 +328,18 @@ void ConvertCompactLatticeToPhones(const TransitionModel &trans,
|
|||
arc.weight.SetString(phone_seq);
|
||||
aiter.SetValue(arc);
|
||||
} // end looping over arcs
|
||||
Weight f = clat->Final(state);
|
||||
if (f != Weight::Zero()) {
|
||||
std::vector<int32> phone_seq;
|
||||
const std::vector<int32> &tid_seq = f.String();
|
||||
for (std::vector<int32>::const_iterator iter = tid_seq.begin();
|
||||
iter != tid_seq.end(); ++iter) {
|
||||
if (trans.IsFinal(*iter))// note: there is one of these per phone...
|
||||
phone_seq.push_back(trans.TransitionIdToPhone(*iter));
|
||||
}
|
||||
f.SetString(phone_seq);
|
||||
clat->SetFinal(state, f);
|
||||
}
|
||||
} // end looping over states
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
// lat/minimize-lattice-test.cc
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "lat/kaldi-lattice.h"
|
||||
#include "lat/minimize-lattice.h"
|
||||
#include "lat/push-lattice.h"
|
||||
#include "fstext/rand-fst.h"
|
||||
|
||||
|
||||
namespace kaldi {
|
||||
using namespace fst;
|
||||
|
||||
CompactLattice *RandDeterministicCompactLattice() {
|
||||
RandFstOptions opts;
|
||||
opts.acyclic = true;
|
||||
while (1) {
|
||||
Lattice *fst = fst::RandPairFst<LatticeArc>(opts);
|
||||
CompactLattice *cfst = new CompactLattice;
|
||||
if (!DeterminizeLattice(*fst, cfst)) {
|
||||
delete fst;
|
||||
delete cfst;
|
||||
KALDI_WARN << "Determinization failed, trying again.";
|
||||
} else {
|
||||
delete fst;
|
||||
return cfst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestMinimizeCompactLattice() {
|
||||
CompactLattice *clat = RandDeterministicCompactLattice();
|
||||
CompactLattice clat2(*clat);
|
||||
BaseFloat delta = (rand() % 2 == 0 ? 1.0 : 1.0e-05);
|
||||
|
||||
// Minimization will only work well on determinized and pushed lattices.
|
||||
PushCompactLatticeStrings(&clat2);
|
||||
PushCompactLatticeWeights(&clat2);
|
||||
|
||||
MinimizeCompactLattice(&clat2, delta);
|
||||
KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, delta, rand(), 10));
|
||||
|
||||
delete clat;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace kaldi
|
||||
|
||||
int main() {
|
||||
using namespace kaldi;
|
||||
SetVerboseLevel(4);
|
||||
for (int32 i = 0; i < 1000; i++) {
|
||||
TestMinimizeCompactLattice();
|
||||
}
|
||||
KALDI_LOG << "Success.";
|
||||
}
|
|
@ -0,0 +1,283 @@
|
|||
// lat/minimize-lattice.cc
|
||||
|
||||
// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal)
|
||||
// 2012-2013 Johns Hopkins University (Author: Daniel Povey); Chao Weng;
|
||||
// Bagher BabaAli
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "lat/minimize-lattice.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "util/stl-utils.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
/*
|
||||
Process the states in reverse topological order.
|
||||
For each state, compute a hash-value that will be the same for states
|
||||
that can be combined. Then for each pair of states with the
|
||||
same hash value, check that the "to-states" map to the
|
||||
same equivalence class and that the weights are sufficiently similar.
|
||||
*/
|
||||
|
||||
|
||||
using fst::ArcIterator;
|
||||
using fst::MutableArcIterator;
|
||||
using fst::kNoStateId;
|
||||
|
||||
class CompactLatticeMinimizer {
|
||||
public:
|
||||
typedef CompactLattice::StateId StateId;
|
||||
typedef CompactLatticeArc Arc;
|
||||
typedef Arc::Label Label;
|
||||
typedef CompactLatticeWeight Weight;
|
||||
typedef size_t HashType;
|
||||
|
||||
CompactLatticeMinimizer(CompactLattice *clat, float delta = fst::kDelta):
|
||||
clat_(clat), delta_(delta) { }
|
||||
|
||||
bool Minimize() {
|
||||
if (clat_->Properties(fst::kTopSorted, true) == 0) {
|
||||
if (!TopSort(clat_)) {
|
||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
||||
"(probably your lexicon has empty words or your LM has epsilon cycles; this "
|
||||
" is a bad idea.)";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
ComputeStateHashValues();
|
||||
ComputeStateMap();
|
||||
ModifyModel();
|
||||
return true;
|
||||
}
|
||||
|
||||
static HashType ConvertStringToHashValue(const std::vector<int32> &vec) {
|
||||
const HashType prime = 53281;
|
||||
VectorHasher<int32> h;
|
||||
HashType ans = static_cast<HashType>(h(vec));
|
||||
if (ans == 0) ans = prime;
|
||||
// We don't allow a zero answer, as this can cause too many values to be the
|
||||
// same.
|
||||
return ans;
|
||||
}
|
||||
|
||||
static void InitHashValue(const Weight &final_weight, HashType *h) {
|
||||
const HashType prime1 = 33317, prime2 = 607; // it's pretty random.
|
||||
if (final_weight == Weight::Zero()) *h = prime1;
|
||||
else *h = prime2 * ConvertStringToHashValue(final_weight.String());
|
||||
}
|
||||
|
||||
// It's important that this function and UpdateHashValueForFinalProb be
|
||||
// insensitive to the order in which it's called, as the order of the arcs
|
||||
// won't necessarily be the same for different equivalent states.
|
||||
static void UpdateHashValueForTransition(const Weight &weight,
|
||||
Label label,
|
||||
HashType &next_state_hash,
|
||||
HashType *h) {
|
||||
const HashType prime1 = 1447, prime2 = 51907;
|
||||
if (label == 0) label = prime2; // Zeros will cause problems.
|
||||
*h += prime1 * label *
|
||||
(1 + ConvertStringToHashValue(weight.String()) * next_state_hash);
|
||||
// Above, the "1 +" is to ensure that if somehow we get zeros due to
|
||||
// weird word sequences, they don't propagate.
|
||||
}
|
||||
|
||||
void ComputeStateHashValues() {
|
||||
// Note: clat_ is topologically sorted, and StateId is
|
||||
// signed. Each state's hash value is only a function of toplogically-later
|
||||
// states' hash values.
|
||||
state_hashes_.resize(clat_->NumStates());
|
||||
for (StateId s = clat_->NumStates() - 1; s >= 0; s--) {
|
||||
HashType this_hash;
|
||||
InitHashValue(clat_->Final(s), &this_hash);
|
||||
for (ArcIterator<CompactLattice> aiter(*clat_, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
const Arc &arc = aiter.Value();
|
||||
HashType next_hash;
|
||||
if (arc.nextstate > s) {
|
||||
next_hash = state_hashes_[arc.nextstate];
|
||||
} else {
|
||||
KALDI_ASSERT(s == arc.nextstate &&
|
||||
"Lattice not topologically sorted [code error]");
|
||||
next_hash = 1;
|
||||
KALDI_WARN << "Minimizing lattice with self-loops "
|
||||
"(lattices should not have self-loops)";
|
||||
}
|
||||
UpdateHashValueForTransition(arc.weight, arc.ilabel,
|
||||
next_hash, &this_hash);
|
||||
}
|
||||
state_hashes_[s] = this_hash;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct EquivalenceSorter {
|
||||
// This struct has an operator () which you can interpret as a less-than (<)
|
||||
// operator for arcs. We sort on ilabel; since the lattice is supposed to
|
||||
// be deterministic, this should completely determine the ordering (there
|
||||
// should not be more than one arc with the same ilabel, out of the same
|
||||
// state). For identical ilabels we next sort on the nextstate, simply to
|
||||
// better handle non-deterministic input (we do our best on this, without
|
||||
// guaranteeing full minimization). We could sort on the strings next, but
|
||||
// this would be an unnecessary hassle as we only really need good
|
||||
// performance on deterministic input.
|
||||
bool operator () (const Arc &a, const Arc &b) const {
|
||||
if (a.ilabel < b.ilabel) return true;
|
||||
else if (a.ilabel > b.ilabel) return false;
|
||||
else if (a.nextstate < b.nextstate) return true;
|
||||
else return false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// This function works out whether s and t are equivalent, assuming
|
||||
// we have already partitioned all topologically-later states into
|
||||
// equivalence classes (i.e. set up state_map_).
|
||||
bool Equivalent(StateId s, StateId t) const {
|
||||
if (!ApproxEqual(clat_->Final(s), clat_->Final(t), delta_))
|
||||
return false;
|
||||
if (clat_->NumArcs(s) != clat_->NumArcs(t))
|
||||
return false;
|
||||
std::vector<Arc> s_arcs;
|
||||
std::vector<Arc> t_arcs;
|
||||
for (int32 iter = 0; iter <= 1; iter++) {
|
||||
StateId state = (iter == 0 ? s : t);
|
||||
std::vector<Arc> &arcs = (iter == 0 ? s_arcs : t_arcs);
|
||||
arcs.reserve(clat_->NumArcs(s));
|
||||
for (ArcIterator<CompactLattice> aiter(*clat_, state); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
Arc arc = aiter.Value();
|
||||
if (arc.nextstate == state) {
|
||||
// This is a special case for states that have self-loops. If two
|
||||
// states have an identical self-loop arc, they may be equivalent.
|
||||
arc.nextstate = kNoStateId;
|
||||
} else {
|
||||
KALDI_ASSERT(arc.nextstate > state);
|
||||
//while (state_map_[arc.nextstate] != arc.nextstate)
|
||||
arc.nextstate = state_map_[arc.nextstate];
|
||||
arcs.push_back(arc);
|
||||
}
|
||||
}
|
||||
EquivalenceSorter s;
|
||||
std::sort(arcs.begin(), arcs.end(), s);
|
||||
}
|
||||
KALDI_ASSERT(s_arcs.size() == t_arcs.size());
|
||||
for (size_t i = 0; i < s_arcs.size(); i++) {
|
||||
if (s_arcs[i].nextstate != t_arcs[i].nextstate) return false;
|
||||
KALDI_ASSERT(s_arcs[i].ilabel == s_arcs[i].olabel); // CompactLattices are
|
||||
// supposed to be
|
||||
// acceptors.
|
||||
if (s_arcs[i].ilabel != t_arcs[i].ilabel) return false;
|
||||
// We've already mapped to equivalence classes.
|
||||
if (s_arcs[i].nextstate != t_arcs[i].nextstate) return false;
|
||||
if (!ApproxEqual(s_arcs[i].weight, t_arcs[i].weight)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ComputeStateMap() {
|
||||
// We have to compute the state mapping in reverse topological order also,
|
||||
// since the equivalence test relies on later states being already sorted
|
||||
// out into equivalence classes (by state_map_).
|
||||
StateId num_states = clat_->NumStates();
|
||||
unordered_map<HashType, std::vector<StateId> > hash_groups_;
|
||||
|
||||
for (StateId s = 0; s < num_states; s++)
|
||||
hash_groups_[state_hashes_[s]].push_back(s);
|
||||
|
||||
state_map_.resize(num_states);
|
||||
for (StateId s = 0; s < num_states; s++)
|
||||
state_map_[s] = s; // Default mapping.
|
||||
|
||||
|
||||
{ // This block is just diagnostic.
|
||||
typedef unordered_map<HashType, std::vector<StateId> >::const_iterator
|
||||
HashIter;
|
||||
size_t max_size = 0;
|
||||
for (HashIter iter = hash_groups_.begin(); iter != hash_groups_.end();
|
||||
++iter)
|
||||
max_size = std::max(max_size, iter->second.size());
|
||||
if (max_size > 1000) {
|
||||
KALDI_WARN << "Largest equivalence group (using hash) is " << max_size
|
||||
<< ", minimization might be slow.";
|
||||
}
|
||||
}
|
||||
|
||||
for (StateId s = num_states - 1; s >= 0; s--) {
|
||||
HashType hash = state_hashes_[s];
|
||||
const std::vector<StateId> &equivalence_class = hash_groups_[hash];
|
||||
KALDI_ASSERT(!equivalence_class.empty());
|
||||
for (size_t i = 0; i < equivalence_class.size(); i++) {
|
||||
StateId t = equivalence_class[i];
|
||||
// Below, there is no point doing the test if state_map_[t] != t, because
|
||||
// in that case we will, before after this, be comparing with another state
|
||||
// that is equivalent to t.
|
||||
if (t > s && state_map_[t] == t && Equivalent(s, t)) {
|
||||
state_map_[s] = t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ModifyModel() {
|
||||
// Modifies the model according to state_map_;
|
||||
|
||||
StateId num_removed = 0;
|
||||
StateId num_states = clat_->NumStates();
|
||||
for (StateId s = 0; s < num_states; s++)
|
||||
if (state_map_[s] != s)
|
||||
num_removed++;
|
||||
KALDI_VLOG(3) << "Removing " << num_removed << " of "
|
||||
<< num_states << " states.";
|
||||
if (num_removed == 0) return; // Nothing to do.
|
||||
|
||||
clat_->SetStart(state_map_[clat_->Start()]);
|
||||
|
||||
for (StateId s = 0; s < num_states; s++) {
|
||||
if (state_map_[s] != s)
|
||||
continue; // There is no point modifying states we're removing.
|
||||
for (MutableArcIterator<CompactLattice> aiter(clat_, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
Arc arc = aiter.Value();
|
||||
StateId mapped_nextstate = state_map_[arc.nextstate];
|
||||
if (mapped_nextstate != arc.nextstate) {
|
||||
arc.nextstate = mapped_nextstate;
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
}
|
||||
}
|
||||
fst::Connect(clat_);
|
||||
}
|
||||
private:
|
||||
CompactLattice *clat_;
|
||||
float delta_;
|
||||
std::vector<HashType> state_hashes_;
|
||||
std::vector<StateId> state_map_; // maps each state to itself or to some
|
||||
// equivalent state. Within each equivalence
|
||||
// class, we pick one arbitrarily.
|
||||
};
|
||||
|
||||
bool MinimizeCompactLattice(CompactLattice *clat, float delta) {
|
||||
CompactLatticeMinimizer minimizer(clat, delta);
|
||||
return minimizer.Minimize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace kaldi
|
|
@ -0,0 +1,48 @@
|
|||
// lat/minimize-lattice.h
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#ifndef KALDI_LAT_MINIMIZE_LATTICE_H_
|
||||
#define KALDI_LAT_MINIMIZE_LATTICE_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "fstext/fstext-lib.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "lat/kaldi-lattice.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
||||
/// This function minimizes the compact lattice. It is to be called after
|
||||
/// determinization (see ../fstext/determinize-lattice-pruned.h) and pushing
|
||||
/// (see ./push-lattice.h). If the lattice is not determinized and pushed this
|
||||
/// function will not combine as many states as it could, but it won't crash.
|
||||
/// Returns true on success, and false if it failed due to topological sorting
|
||||
/// failing.
|
||||
bool MinimizeCompactLattice(CompactLattice *clat, float delta = fst::kDelta);
|
||||
|
||||
|
||||
|
||||
} // namespace kaldi
|
||||
|
||||
#endif // KALDI_LAT_PUSH_LATTICE_H_
|
|
@ -0,0 +1,119 @@
|
|||
// lat/push-lattice-test.cc
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "lat/kaldi-lattice.h"
|
||||
#include "lat/push-lattice.h"
|
||||
#include "fstext/rand-fst.h"
|
||||
|
||||
|
||||
namespace kaldi {
|
||||
using namespace fst;
|
||||
|
||||
CompactLattice *RandCompactLattice() {
|
||||
RandFstOptions opts;
|
||||
opts.acyclic = true;
|
||||
Lattice *fst = fst::RandPairFst<LatticeArc>(opts);
|
||||
CompactLattice *cfst = new CompactLattice;
|
||||
ConvertLattice(*fst, cfst);
|
||||
delete fst;
|
||||
return cfst;
|
||||
}
|
||||
|
||||
void TestPushCompactLatticeStrings() {
|
||||
CompactLattice *clat = RandCompactLattice();
|
||||
CompactLattice clat2(*clat);
|
||||
PushCompactLatticeStrings(&clat2);
|
||||
KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, 0.001, rand(), 10));
|
||||
for (CompactLatticeArc::StateId s = 0; s < clat2.NumStates(); s++) {
|
||||
if (s == 0)
|
||||
continue; // We don't check state zero, as the "leftover string" stays
|
||||
// there.
|
||||
int32 first_label;
|
||||
bool ok = false;
|
||||
bool first_label_set = false;
|
||||
for (ArcIterator<CompactLattice> aiter(clat2, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
if (aiter.Value().weight.String().size() == 0) {
|
||||
ok = true;
|
||||
} else {
|
||||
int32 this_label = aiter.Value().weight.String().front();
|
||||
if (first_label_set) {
|
||||
if (this_label != first_label) ok = true;
|
||||
} else {
|
||||
first_label = this_label;
|
||||
first_label_set = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (clat2.Final(s) != CompactLatticeWeight::Zero()) {
|
||||
if (clat2.Final(s).String().size() == 0) ok = true;
|
||||
else {
|
||||
int32 this_label = clat2.Final(s).String().front();
|
||||
if (first_label_set && this_label != first_label) ok = true;
|
||||
}
|
||||
}
|
||||
KALDI_ASSERT(ok);
|
||||
}
|
||||
delete clat;
|
||||
}
|
||||
|
||||
void TestPushCompactLatticeWeights() {
|
||||
CompactLattice *clat = RandCompactLattice();
|
||||
CompactLattice clat2(*clat);
|
||||
PushCompactLatticeWeights(&clat2);
|
||||
KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, 0.001, rand(), 10));
|
||||
for (CompactLatticeArc::StateId s = 0; s < clat2.NumStates(); s++) {
|
||||
if (s == 0)
|
||||
continue; // We don't check state zero, as the "leftover string" stays
|
||||
// there.
|
||||
LatticeWeight sum = clat2.Final(s).Weight();
|
||||
for (ArcIterator<CompactLattice> aiter(clat2, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
sum = Plus(sum, aiter.Value().weight.Weight());
|
||||
}
|
||||
if (!ApproxEqual(sum, LatticeWeight::One())) {
|
||||
{
|
||||
fst::FstPrinter<CompactLatticeArc> printer(clat2, NULL, NULL,
|
||||
NULL, true, true);
|
||||
printer.Print(&std::cerr, "<unknown>");
|
||||
}
|
||||
{
|
||||
fst::FstPrinter<CompactLatticeArc> printer(*clat, NULL, NULL,
|
||||
NULL, true, true);
|
||||
printer.Print(&std::cerr, "<unknown>");
|
||||
}
|
||||
KALDI_ERR << "Bad lattice being pushed.";
|
||||
}
|
||||
}
|
||||
delete clat;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // end namespace kaldi
|
||||
|
||||
int main() {
|
||||
using namespace kaldi;
|
||||
for (int32 i = 0; i < 15; i++) {
|
||||
TestPushCompactLatticeStrings();
|
||||
TestPushCompactLatticeWeights();
|
||||
}
|
||||
KALDI_LOG << "Success.";
|
||||
}
|
|
@ -202,12 +202,75 @@ class CompactLatticePusher {
|
|||
std::vector<int32> shift_vec_;
|
||||
};
|
||||
|
||||
bool PushCompactLattice(CompactLattice *clat) {
|
||||
bool PushCompactLatticeStrings(CompactLattice *clat) {
|
||||
CompactLatticePusher pusher(clat);
|
||||
return pusher.Push();
|
||||
}
|
||||
|
||||
|
||||
bool PushCompactLatticeWeights(CompactLattice *clat) {
|
||||
if (clat->Properties(fst::kTopSorted, true) == 0) {
|
||||
if (!TopSort(clat)) {
|
||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
||||
"(probably your lexicon has empty words or your LM has epsilon cycles; this "
|
||||
" is a bad idea.)";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
typedef CompactLattice::StateId StateId; // Note: this is guaranteed to be
|
||||
// signed.
|
||||
typedef CompactLatticeArc Arc;
|
||||
typedef CompactLatticeWeight Weight;
|
||||
|
||||
StateId num_states = clat->NumStates();
|
||||
if (num_states == 0) {
|
||||
KALDI_WARN << "Pushing weights of empty compact lattice";
|
||||
return true; // this is technically success because an empty
|
||||
// lattice is already pushed.
|
||||
}
|
||||
std::vector<LatticeWeight> weight_to_end(num_states); // Note: LatticeWeight
|
||||
// contains two floats.
|
||||
for (StateId s = num_states - 1; s >= 0; s--) {
|
||||
LatticeWeight this_weight_to_end = clat->Final(s).Weight();
|
||||
for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
const Arc &arc = aiter.Value();
|
||||
KALDI_ASSERT(arc.nextstate > s && "Cyclic lattices not allowed.");
|
||||
this_weight_to_end = Plus(this_weight_to_end,
|
||||
Times(aiter.Value().weight.Weight(),
|
||||
weight_to_end[arc.nextstate]));
|
||||
}
|
||||
if (this_weight_to_end == LatticeWeight::Zero()) {
|
||||
KALDI_WARN << "Lattice has non-coaccessible states.";
|
||||
}
|
||||
weight_to_end[s] = this_weight_to_end;
|
||||
}
|
||||
weight_to_end[0] = LatticeWeight::One(); // We leave the "leftover weight" on
|
||||
// the start state, which won't
|
||||
// necessarily end up summing to one.
|
||||
for (StateId s = 0; s < num_states; s++) {
|
||||
LatticeWeight this_weight_to_end = weight_to_end[s];
|
||||
if (this_weight_to_end == LatticeWeight::Zero())
|
||||
continue;
|
||||
for (fst::MutableArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
Arc arc = aiter.Value();
|
||||
LatticeWeight next_weight_to_end = weight_to_end[arc.nextstate];
|
||||
if (next_weight_to_end != LatticeWeight::Zero()) {
|
||||
arc.weight.SetWeight(Times(arc.weight.Weight(),
|
||||
Divide(next_weight_to_end,
|
||||
this_weight_to_end)));
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
}
|
||||
Weight final_weight = clat->Final(s);
|
||||
if (final_weight != Weight::Zero()) {
|
||||
final_weight.SetWeight(Divide(final_weight.Weight(), this_weight_to_end));
|
||||
clat->SetFinal(s, final_weight);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -41,7 +41,16 @@ namespace kaldi {
|
|||
/// It returns true on success, false if it failed due to TopSort failing,
|
||||
/// which should never happen, but we handle it gracefully by just leaving the
|
||||
/// lattice the same.
|
||||
bool PushCompactLattice(CompactLattice *clat);
|
||||
/// This function used to be called just PushCompactLattice.
|
||||
bool PushCompactLatticeStrings(CompactLattice *clat);
|
||||
|
||||
/// This function pushes the weights in the CompactLattice so that all states
|
||||
/// except possibly the start state, have Weight components (of type
|
||||
/// LatticeWeight) that "sum to one" in the LatticeWeight (i.e. interpreting the
|
||||
/// weights as negated log-probs). It returns true on success, false if it
|
||||
/// failed due to TopSort failing, which should never happen, but we handle it
|
||||
/// gracefully by just leaving the lattice the same.
|
||||
bool PushCompactLatticeWeights(CompactLattice *clat);
|
||||
|
||||
} // namespace kaldi
|
||||
|
||||
|
|
|
@ -16,7 +16,8 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
|
|||
lattice-to-ctm-conf lattice-arcgraph lattice-combine lattice-reverse \
|
||||
lattice-rescore-mapped lattice-depth lattice-align-phones \
|
||||
lattice-to-smbr-post lattice-determinize-pruned-parallel \
|
||||
lattice-add-penalty lattice-align-words-lexicon lattice-push
|
||||
lattice-add-penalty lattice-align-words-lexicon lattice-push \
|
||||
lattice-minimize
|
||||
|
||||
|
||||
OBJFILES =
|
||||
|
|
|
@ -82,8 +82,8 @@ int main(int argc, char *argv[]) {
|
|||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Done " << num_done << " lattices.";
|
||||
KALDI_LOG << "The average density is "
|
||||
<< static_cast<float> ((float)sum_depth / total_t);
|
||||
KALDI_LOG << "Overall density is "
|
||||
<< (static_cast<BaseFloat>(sum_depth) / total_t);
|
||||
if (num_done != 0) return 0;
|
||||
else return 1;
|
||||
} catch (const std::exception &e) {
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "lat/kaldi-lattice.h"
|
||||
#include "fstext/determinize-lattice-pruned.h"
|
||||
#include "lat/lattice-functions.h"
|
||||
#include "lat/push-lattice.h"
|
||||
#include "lat/minimize-lattice.h"
|
||||
#include "thread/kaldi-task-sequence.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
@ -33,11 +35,13 @@ class DeterminizeLatticeTask {
|
|||
std::string key,
|
||||
BaseFloat acoustic_scale,
|
||||
BaseFloat beam,
|
||||
bool minimize,
|
||||
Lattice *lat,
|
||||
CompactLatticeWriter *clat_writer,
|
||||
int32 *num_warn):
|
||||
opts_(opts), key_(key), acoustic_scale_(acoustic_scale), beam_(beam),
|
||||
lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn) { }
|
||||
minimize_(minimize), lat_(lat), clat_writer_(clat_writer),
|
||||
num_warn_(num_warn) { }
|
||||
|
||||
void operator () () {
|
||||
Invert(lat_); // to get word labels on the input side.
|
||||
|
@ -58,6 +62,11 @@ class DeterminizeLatticeTask {
|
|||
}
|
||||
delete lat_; // This is no longer needed so we can delete it now;
|
||||
lat_ = NULL;
|
||||
if (minimize_) {
|
||||
PushCompactLatticeStrings(&det_clat_);
|
||||
PushCompactLatticeWeights(&det_clat_);
|
||||
MinimizeCompactLattice(&det_clat_);
|
||||
}
|
||||
// Invert the original acoustic scaling
|
||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale_),
|
||||
&det_clat_);
|
||||
|
@ -72,6 +81,7 @@ class DeterminizeLatticeTask {
|
|||
std::string key_;
|
||||
BaseFloat acoustic_scale_;
|
||||
BaseFloat beam_;
|
||||
bool minimize_;
|
||||
Lattice *lat_; // The lattice we're working on. Owned locally.
|
||||
CompactLattice det_clat_; // The output of our process. Will be written
|
||||
// to clat_writer_ in the destructor.
|
||||
|
@ -101,6 +111,7 @@ int main(int argc, char *argv[]) {
|
|||
ParseOptions po(usage);
|
||||
BaseFloat acoustic_scale = 1.0;
|
||||
BaseFloat beam = 10.0;
|
||||
bool minimize = false;
|
||||
TaskSequencerConfig sequencer_config; // has --num-threads option
|
||||
fst::DeterminizeLatticePrunedOptions determinize_config; // Options used in DeterminizeLatticePruned--
|
||||
// this options class does not have its own Register function as it's viewed as
|
||||
|
@ -108,8 +119,11 @@ int main(int argc, char *argv[]) {
|
|||
determinize_config.max_mem = 50000000;
|
||||
determinize_config.max_loop = 0; // was 500000;
|
||||
|
||||
po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
|
||||
po.Register("acoustic-scale", &acoustic_scale,
|
||||
"Scaling factor for acoustic likelihoods");
|
||||
po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling].");
|
||||
po.Register("minimize", &minimize,
|
||||
"If true, push and minimize after determinization");
|
||||
determinize_config.Register(&po);
|
||||
sequencer_config.Register(&po);
|
||||
po.Read(argc, argv);
|
||||
|
@ -142,7 +156,7 @@ int main(int argc, char *argv[]) {
|
|||
Lattice *lat = lat_reader.Value().Copy(); // will give ownership to "task"
|
||||
// below
|
||||
DeterminizeLatticeTask *task = new DeterminizeLatticeTask(
|
||||
determinize_config, key, acoustic_scale, beam,
|
||||
determinize_config, key, acoustic_scale, beam, minimize,
|
||||
lat, &compact_lat_writer, &n_warn);
|
||||
sequencer.Run(task);
|
||||
n_done++;
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "lat/kaldi-lattice.h"
|
||||
#include "fstext/determinize-lattice-pruned.h"
|
||||
#include "lat/lattice-functions.h"
|
||||
#include "lat/push-lattice.h"
|
||||
#include "lat/minimize-lattice.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
|
@ -39,14 +41,18 @@ int main(int argc, char *argv[]) {
|
|||
ParseOptions po(usage);
|
||||
BaseFloat acoustic_scale = 1.0;
|
||||
BaseFloat beam = 10.0;
|
||||
bool minimize = false;
|
||||
fst::DeterminizeLatticePrunedOptions opts; // Options used in DeterminizeLatticePruned--
|
||||
// this options class does not have its own Register function as it's viewed as
|
||||
// being more part of "fst world", so we register its elements independently.
|
||||
opts.max_mem = 50000000;
|
||||
opts.max_loop = 0; // was 500000;
|
||||
|
||||
po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
|
||||
po.Register("acoustic-scale", &acoustic_scale,
|
||||
"Scaling factor for acoustic likelihoods");
|
||||
po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling].");
|
||||
po.Register("minimize", &minimize,
|
||||
"If true, push and minimize after determinization");
|
||||
opts.Register(&po);
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
@ -89,6 +95,11 @@ int main(int argc, char *argv[]) {
|
|||
"(partial output will be pruned tighter than the specified beam.)";
|
||||
n_warn++;
|
||||
}
|
||||
if (minimize) {
|
||||
PushCompactLatticeStrings(&det_clat);
|
||||
PushCompactLatticeWeights(&det_clat);
|
||||
MinimizeCompactLattice(&det_clat);
|
||||
}
|
||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &det_clat);
|
||||
compact_lat_writer.Write(key, det_clat);
|
||||
n_done++;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// latbin/lattice-determinize.cc
|
||||
|
||||
// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||
// Copyright 2009-2012 Microsoft Corporation
|
||||
// 2012-2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -23,6 +24,8 @@
|
|||
#include "fstext/fstext-lib.h"
|
||||
#include "lat/kaldi-lattice.h"
|
||||
#include "lat/lattice-functions.h"
|
||||
#include "lat/push-lattice.h"
|
||||
#include "lat/minimize-lattice.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
@ -111,16 +114,27 @@ int main(int argc, char *argv[]) {
|
|||
int32 max_loop = 500000;
|
||||
BaseFloat delta = fst::kDelta;
|
||||
bool prune = false;
|
||||
bool minimize = false;
|
||||
|
||||
po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
|
||||
po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling]-- also used "
|
||||
"to handle determinization failures, set --prune=false to disable routine pruning");
|
||||
po.Register("acoustic-scale", &acoustic_scale,
|
||||
"Scaling factor for acoustic likelihoods");
|
||||
po.Register("beam", &beam,
|
||||
"Pruning beam [applied after acoustic scaling]-- also used "
|
||||
"to handle determinization failures, set --prune=false to "
|
||||
"disable routine pruning");
|
||||
po.Register("delta", &delta, "Tolerance used in determinization");
|
||||
po.Register("prune", &prune, "If true, prune determinized lattices with the --beam option.");
|
||||
po.Register("max-mem", &max_mem, "Maximum approximate memory usage in determinization (real usage might be many times this)");
|
||||
po.Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
|
||||
po.Register("beam-ratio", &beam_ratio, "Ratio by which to decrease beam if we reach the max-arcs.");
|
||||
po.Register("num-loops", &num_loops, "Number of times to decrease beam by beam-ratio if determinization fails.");
|
||||
po.Register("prune", &prune, "If true, prune determinized lattices "
|
||||
"with the --beam option.");
|
||||
po.Register("max-mem", &max_mem, "Maximum approximate memory usage in "
|
||||
"determinization (real usage might be many times this)");
|
||||
po.Register("max-loop", &max_loop, "Option to detect a certain "
|
||||
"type of failure in lattice determinization (not critical)");
|
||||
po.Register("beam-ratio", &beam_ratio, "Ratio by which to "
|
||||
"decrease beam if we reach the max-arcs.");
|
||||
po.Register("num-loops", &num_loops, "Number of times to "
|
||||
"decrease beam by beam-ratio if determinization fails.");
|
||||
po.Register("minimize", &minimize,
|
||||
"If true, push and minimize after determinization");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
@ -158,6 +172,11 @@ int main(int argc, char *argv[]) {
|
|||
if (DeterminizeLatticeWrapper(lat, key, prune,
|
||||
beam, beam_ratio, max_mem, max_loop,
|
||||
delta, num_loops, &clat)) {
|
||||
if (minimize) {
|
||||
PushCompactLatticeStrings(&clat);
|
||||
PushCompactLatticeWeights(&clat);
|
||||
MinimizeCompactLattice(&clat);
|
||||
}
|
||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &clat);
|
||||
compact_lattice_writer.Write(key, clat);
|
||||
n_done++;
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
// latbin/lattice-minimize.cc
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "fstext/fstext-lib.h"
|
||||
#include "lat/kaldi-lattice.h"
|
||||
#include "lat/minimize-lattice.h"
|
||||
#include "lat/push-lattice.h"
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
typedef kaldi::int64 int64;
|
||||
using fst::SymbolTable;
|
||||
using fst::VectorFst;
|
||||
using fst::StdArc;
|
||||
|
||||
const char *usage =
|
||||
"Minimize lattices, in CompactLattice format. Should be applied to\n"
|
||||
"determinized lattices (e.g. produced with --determinize-lattice=true)\n"
|
||||
"Note: by default this program\n"
|
||||
"pushes the strings and weights prior to minimization."
|
||||
"Usage: lattice-minimize [options] lattice-rspecifier lattice-wspecifier\n"
|
||||
" e.g.: lattice-minimize ark:1.lats ark:2.lats\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool push_strings = true;
|
||||
bool push_weights = true;
|
||||
|
||||
po.Register("push-strings", &push_strings, "If true, push the strings in the "
|
||||
"lattice to the start.");
|
||||
po.Register("push-weights", &push_weights, "If true, push the weights in the "
|
||||
"lattice to the start.");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string lats_rspecifier = po.GetArg(1),
|
||||
lats_wspecifier = po.GetArg(2);
|
||||
|
||||
|
||||
SequentialCompactLatticeReader clat_reader(lats_rspecifier);
|
||||
CompactLatticeWriter clat_writer(lats_wspecifier);
|
||||
|
||||
int32 n_done = 0, n_err = 0;
|
||||
|
||||
|
||||
for (; !clat_reader.Done(); clat_reader.Next()) {
|
||||
std::string key = clat_reader.Key();
|
||||
CompactLattice clat = clat_reader.Value();
|
||||
KALDI_VLOG(1) << "Processing lattice for utterance " << key;
|
||||
if (push_strings && !PushCompactLatticeStrings(&clat)) {
|
||||
KALDI_WARN << "Failure in pushing lattice strings (bad lattice?), "
|
||||
<< "for key " << key;
|
||||
n_err++;
|
||||
continue;
|
||||
}
|
||||
if (push_weights && !PushCompactLatticeWeights(&clat)) {
|
||||
KALDI_WARN << "Failure in pushing lattice weights (bad lattice?),"
|
||||
<< "for key " << key ;
|
||||
n_err++;
|
||||
continue;
|
||||
}
|
||||
if (!MinimizeCompactLattice(&clat)) {
|
||||
KALDI_WARN << "Failure in minimizing lattice (bad lattice?),"
|
||||
<< "for key " << key ;
|
||||
n_err++;
|
||||
continue;
|
||||
}
|
||||
if (clat.NumStates() == 0) {
|
||||
KALDI_WARN << "Empty lattice for key " << key;
|
||||
n_err++;
|
||||
continue;
|
||||
}
|
||||
clat_writer.Write(key, clat);
|
||||
n_done++;
|
||||
}
|
||||
KALDI_LOG << "Minimized " << n_done << " lattices, errors on " << n_err;
|
||||
return (n_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
// latbin/lattice-oracle.cc
|
||||
|
||||
// Copyright 2011 Gilles Boulianne
|
||||
// 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
//
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -95,10 +96,10 @@ void CreateEditDistance(const fst::StdVectorFst &fst1,
|
|||
typedef StdArc StdArc;
|
||||
typedef StdArc::Weight Weight;
|
||||
typedef StdArc::Label Label;
|
||||
Weight corrCost(0.0);
|
||||
Weight subsCost(1.0);
|
||||
Weight insCost(1.0);
|
||||
Weight delCost(1.0);
|
||||
Weight correct_cost(0.0);
|
||||
Weight substitution_cost(1.0);
|
||||
Weight insertion_cost(1.0);
|
||||
Weight deletion_cost(1.0);
|
||||
|
||||
// create set of output symbols in fst1
|
||||
std::vector<Label> fst1syms, fst2syms;
|
||||
|
@ -108,17 +109,17 @@ void CreateEditDistance(const fst::StdVectorFst &fst1,
|
|||
pfst->AddState();
|
||||
pfst->SetStart(0);
|
||||
for (size_t i = 0; i < fst1syms.size(); i++)
|
||||
pfst->AddArc(0, StdArc(fst1syms[i], 0, delCost, 0)); // deletions
|
||||
pfst->AddArc(0, StdArc(fst1syms[i], 0, deletion_cost, 0)); // deletions
|
||||
|
||||
for (size_t i = 0; i < fst2syms.size(); i++)
|
||||
pfst->AddArc(0, StdArc(0, fst2syms[i], insCost, 0)); // insertions
|
||||
pfst->AddArc(0, StdArc(0, fst2syms[i], insertion_cost, 0)); // insertions
|
||||
|
||||
// stupid implementation O(N^2)
|
||||
for (size_t i = 0; i < fst1syms.size(); i++) {
|
||||
Label label1 = fst1syms[i];
|
||||
for (size_t j = 0; j < fst2syms.size(); j++) {
|
||||
Label label2 = fst2syms[j];
|
||||
Weight cost( label1 == label2 ? corrCost : subsCost);
|
||||
Weight cost( label1 == label2 ? correct_cost : substitution_cost);
|
||||
pfst->AddArc(0, StdArc(label1, label2, cost, 0)); // substitutions
|
||||
}
|
||||
}
|
||||
|
@ -127,30 +128,33 @@ void CreateEditDistance(const fst::StdVectorFst &fst1,
|
|||
}
|
||||
|
||||
void CountErrors(fst::StdVectorFst &fst,
|
||||
unsigned int *corr,
|
||||
unsigned int *subs,
|
||||
unsigned int *ins,
|
||||
unsigned int *del,
|
||||
unsigned int *totwords) {
|
||||
int32 *correct,
|
||||
int32 *substitutions,
|
||||
int32 *insertions,
|
||||
int32 *deletions,
|
||||
int32 *num_words) {
|
||||
typedef fst::StdArc::StateId StateId;
|
||||
typedef fst::StdArc::Weight Weight;
|
||||
*corr = *subs = *ins = *del = *totwords = 0;
|
||||
*correct = *substitutions = *insertions = *deletions = *num_words = 0;
|
||||
|
||||
// go through the first complete path in fst (there should be only one)
|
||||
StateId src = fst.Start();
|
||||
while (fst.Final(src)== Weight::Zero()) { // while not final
|
||||
for (fst::ArcIterator<fst::StdVectorFst> aiter(fst, src); !aiter.Done(); aiter.Next()) {
|
||||
fst::StdArc arc = aiter.Value();
|
||||
if (arc.ilabel == 0 && arc.olabel == 0) {
|
||||
// don't count these so we may compare number of arcs and number of errors
|
||||
} else if (arc.ilabel == arc.olabel) {
|
||||
(*corr)++; (*totwords)++;
|
||||
} else if (arc.ilabel == 0) {
|
||||
(*ins)++;
|
||||
} else if (arc.olabel == 0) {
|
||||
(*del)++; (*totwords)++;
|
||||
if (arc.ilabel == arc.olabel && arc.ilabel != 0) {
|
||||
(*correct)++;
|
||||
(*num_words)++;
|
||||
} else if (arc.ilabel == 0 && arc.olabel != 0) {
|
||||
(*deletions)++;
|
||||
(*num_words)++;
|
||||
} else if (arc.ilabel != 0 && arc.olabel == 0) {
|
||||
(*insertions)++;
|
||||
} else if (arc.ilabel != 0 && arc.olabel != 0) {
|
||||
(*substitutions)++;
|
||||
(*num_words)++;
|
||||
} else {
|
||||
(*subs)++; (*totwords)++;
|
||||
KALDI_ASSERT(arc.ilabel == 0 && arc.olabel == 0);
|
||||
}
|
||||
src = arc.nextstate;
|
||||
continue; // jump to next state
|
||||
|
@ -175,7 +179,7 @@ bool CheckFst(fst::StdVectorFst &fst, string name, string key) {
|
|||
|
||||
// Guoguo Chen added the implementation for option "write-lattices". This
|
||||
// function does a depth first search on the lattice and remove the arcs that
|
||||
// don't correspond to the oracle path. By "remove" I actually point the next
|
||||
// don't correctespond to the oracle path. By "remove" I actually point the next
|
||||
// state of the arc to some state that is not in the lattice and then use the
|
||||
// openfst connect function. This makes things much easier.
|
||||
bool GetOracleLattice(Lattice *oracle_lat,
|
||||
|
@ -229,8 +233,9 @@ int main(int argc, char *argv[]) {
|
|||
const char *usage =
|
||||
"Finds the path having the smallest edit-distance between two lattices.\n"
|
||||
"For efficiency put the smallest lattices first (for example reference strings).\n"
|
||||
"Usage: lattice-oracle [options] test-lattice-rspecifier reference-rspecifier transcriptions-wspecifier\n"
|
||||
" e.g.: lattice-oracle ark:ref.lats ark:1.tra ark:2.tra\n";
|
||||
"Usage: lattice-oracle [options] test-lattice-rspecifier reference-rspecifier "
|
||||
"transcriptions-wspecifier [edit-distance-wspecifier]\n"
|
||||
" e.g.: lattice-oracle ark:lat.1 'ark:sym2int.pl -f 2- data/lang/words.txt <data/test/text' ark,t:-\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
|
@ -238,27 +243,28 @@ int main(int argc, char *argv[]) {
|
|||
std::string wild_syms_filename;
|
||||
|
||||
std::string lats_wspecifier;
|
||||
|
||||
po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]");
|
||||
po.Register("wildcard-symbols-list", &wild_syms_filename, "List of symbols that don't count as errors");
|
||||
po.Register("write-lattices", &lats_wspecifier, "If supplied, write 1-best path as lattices to this wspecifier");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() < 3 || po.NumArgs() > 4) {
|
||||
if (po.NumArgs() != 3) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string lats_rspecifier = po.GetArg(1),
|
||||
reference_rspecifier = po.GetArg(2),
|
||||
transcriptions_wspecifier = po.GetOptArg(3);
|
||||
transcriptions_wspecifier = po.GetArg(3);
|
||||
|
||||
// will read input as lattices
|
||||
SequentialLatticeReader lattice_reader(lats_rspecifier);
|
||||
RandomAccessInt32VectorReader reference_reader(reference_rspecifier);
|
||||
|
||||
Int32VectorWriter transcriptions_writer(transcriptions_wspecifier);
|
||||
|
||||
|
||||
// Guoguo Chen added the implementation for option "write-lattices".
|
||||
CompactLatticeWriter lats_writer(lats_wspecifier);
|
||||
|
||||
|
@ -276,7 +282,8 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
int32 n_done = 0, n_fail = 0;
|
||||
unsigned int tot_corr=0, tot_subs=0, tot_ins=0, tot_del=0, tot_words=0;
|
||||
int32 tot_correct=0, tot_substitutions=0, tot_insertions=0, tot_deletions=0,
|
||||
tot_words=0;
|
||||
|
||||
for (; !lattice_reader.Done(); lattice_reader.Next()) {
|
||||
std::string key = lattice_reader.Key();
|
||||
|
@ -284,9 +291,9 @@ int main(int argc, char *argv[]) {
|
|||
cerr << "Lattice "<<key<<" read."<<endl;
|
||||
|
||||
// remove all weights while creating a standard FST
|
||||
VectorFst<StdArc> fst1;
|
||||
ConvertLatticeToUnweightedAcceptor(lat, wild_syms, &fst1);
|
||||
CheckFst(fst1, "fst1_", key);
|
||||
VectorFst<StdArc> lattice_fst;
|
||||
ConvertLatticeToUnweightedAcceptor(lat, wild_syms, &lattice_fst);
|
||||
CheckFst(lattice_fst, "lattice_fst_", key);
|
||||
|
||||
// TODO: map certain symbols (using an FST created with CreateMapFst())
|
||||
|
||||
|
@ -296,44 +303,49 @@ int main(int argc, char *argv[]) {
|
|||
continue;
|
||||
}
|
||||
const std::vector<int32> &reference = reference_reader.Value(key);
|
||||
VectorFst<StdArc> fst2;
|
||||
MakeLinearAcceptor(reference, &fst2);
|
||||
VectorFst<StdArc> reference_fst;
|
||||
MakeLinearAcceptor(reference, &reference_fst);
|
||||
|
||||
CheckFst(fst2, "fst2_", key);
|
||||
CheckFst(reference_fst, "reference_fst_", key);
|
||||
|
||||
// recreate edit distance fst if necessary
|
||||
fst::StdVectorFst editDistanceFst;
|
||||
CreateEditDistance(fst1, fst2, &editDistanceFst);
|
||||
fst::StdVectorFst edit_distance_fst;
|
||||
CreateEditDistance(lattice_fst, reference_fst, &edit_distance_fst);
|
||||
|
||||
// compose with edit distance transducer
|
||||
VectorFst<StdArc> composedFst;
|
||||
fst::Compose(editDistanceFst, fst2, &composedFst);
|
||||
CheckFst(composedFst, "composed_", key);
|
||||
VectorFst<StdArc> edit_ref_fst;
|
||||
fst::Compose(edit_distance_fst, reference_fst, &edit_ref_fst);
|
||||
CheckFst(edit_ref_fst, "composed_", key);
|
||||
|
||||
// make sure composed FST is input sorted
|
||||
fst::ArcSort(&composedFst, fst::StdILabelCompare());
|
||||
fst::ArcSort(&edit_ref_fst, fst::StdILabelCompare());
|
||||
|
||||
// compose with previous result
|
||||
VectorFst<StdArc> resultFst;
|
||||
fst::Compose(fst1, composedFst, &resultFst);
|
||||
CheckFst(resultFst, "result_", key);
|
||||
VectorFst<StdArc> result_fst;
|
||||
fst::Compose(lattice_fst, edit_ref_fst, &result_fst);
|
||||
CheckFst(result_fst, "result_", key);
|
||||
|
||||
// find out best path
|
||||
VectorFst<StdArc> best_path;
|
||||
fst::ShortestPath(resultFst, &best_path);
|
||||
fst::ShortestPath(result_fst, &best_path);
|
||||
CheckFst(best_path, "best_path_", key);
|
||||
|
||||
if (best_path.Start() == fst::kNoStateId) {
|
||||
KALDI_WARN << "Best-path failed for key " << key;
|
||||
n_fail++;
|
||||
} else {
|
||||
|
||||
// count errors
|
||||
unsigned int corr, subs, ins, del, totwords;
|
||||
CountErrors(best_path, &corr, &subs, &ins, &del, &totwords);
|
||||
unsigned int toterrs = subs+ins+del;
|
||||
KALDI_LOG << "%WER "<<(100.*toterrs)/totwords<<" [ "<<toterrs<<" / "<<totwords<<", "<<ins<<" ins, "<<del<<" del, "<<subs<<" sub ]";
|
||||
tot_corr += corr; tot_subs += subs; tot_ins += ins; tot_del += del; tot_words += totwords;
|
||||
int32 correct, substitutions, insertions, deletions, num_words;
|
||||
CountErrors(best_path, &correct, &substitutions, &insertions, &deletions, &num_words);
|
||||
int32 toterrs = substitutions + insertions + deletions;
|
||||
KALDI_LOG << "%WER " << (100.*toterrs) / num_words << " [ " << toterrs
|
||||
<< " / " << num_words << ", " << insertions << " insertions, " << deletions
|
||||
<< " deletions, " << substitutions << " sub ]";
|
||||
tot_correct += correct;
|
||||
tot_substitutions += substitutions;
|
||||
tot_insertions += insertions;
|
||||
tot_deletions += deletions;
|
||||
tot_words += num_words;
|
||||
|
||||
std::vector<int32> oracle_words;
|
||||
std::vector<int32> reference_words;
|
||||
|
@ -354,7 +366,8 @@ int main(int argc, char *argv[]) {
|
|||
for (size_t i = 0; i < reference_words.size(); i++) {
|
||||
std::string s = word_syms->Find(reference_words[i]);
|
||||
if (s == "")
|
||||
KALDI_ERR << "Word-id " << reference_words[i] <<" not in symbol table.";
|
||||
KALDI_ERR << "Word-id " << reference_words[i]
|
||||
<< " not in symbol table.";
|
||||
std::cerr << s << ' ';
|
||||
}
|
||||
std::cerr << '\n';
|
||||
|
@ -367,8 +380,10 @@ int main(int argc, char *argv[]) {
|
|||
if (lats_wspecifier != "") {
|
||||
Lattice oracle_lat = lat;
|
||||
LatticeArc::StateId bad_state = oracle_lat.AddState();
|
||||
if (!GetOracleLattice(&oracle_lat, oracle_words, bad_state, oracle_lat.Start(), 0))
|
||||
KALDI_WARN << "Fail to find the oracle path in the original lattice: " << key;
|
||||
if (!GetOracleLattice(&oracle_lat, oracle_words,
|
||||
bad_state, oracle_lat.Start(), 0))
|
||||
KALDI_WARN << "Failed to find the oracle path in the original "
|
||||
<< "lattice: " << key;
|
||||
CompactLattice oracle_clat;
|
||||
ConvertLattice(oracle_lat, &oracle_clat);
|
||||
lats_writer.Write(key, oracle_clat);
|
||||
|
@ -377,9 +392,13 @@ int main(int argc, char *argv[]) {
|
|||
n_done++;
|
||||
}
|
||||
if (word_syms) delete word_syms;
|
||||
unsigned int tot_errs = tot_subs + tot_del + tot_ins;
|
||||
KALDI_LOG << "Overall %WER "<<(100.*tot_errs)/tot_words<<" [ "<<tot_errs<<" / "<<tot_words<<", "<<tot_ins<<" ins, "<<tot_del<<" del, "<<tot_subs<<" sub ]";
|
||||
KALDI_LOG << "Scored " << n_done << " lattices, "<<n_fail<<" not present in hyp.";
|
||||
int32 tot_errs = tot_substitutions + tot_deletions + tot_insertions;
|
||||
KALDI_LOG << "Overall %WER " << (100.*tot_errs)/tot_words << " [ "
|
||||
<< tot_errs << " / " << tot_words << ", " << tot_insertions
|
||||
<< " insertions, " << tot_deletions << " deletions, "
|
||||
<< tot_substitutions << " substitutions ]";
|
||||
KALDI_LOG << "Scored " << n_done << " lattices, " << n_fail
|
||||
<< " not present in ref.";
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
|
|
|
@ -37,13 +37,22 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
const char *usage =
|
||||
"Push lattices, in CompactLattice format, so that the strings are as\n"
|
||||
"close to the start as possible. Does not affect the weights. This can\n"
|
||||
"be helpful prior to word-alignment.\n"
|
||||
"close to the start as possible, and the lowest cost weight for each\n"
|
||||
"state except the start state is (0, 0). This can be helpful prior to\n"
|
||||
"word-alignment (in this case, only strings need to be pushed)\n"
|
||||
"\n"
|
||||
"Usage: lattice-push [options] lattice-rspecifier lattice-wspecifier\n"
|
||||
" e.g.: lattice-push ark:1.lats ark:2.lats\n";
|
||||
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool push_strings = true;
|
||||
bool push_weights = true;
|
||||
|
||||
po.Register("push-strings", &push_strings, "If true, push the strings in the "
|
||||
"lattice to the start.");
|
||||
po.Register("push-weights", &push_weights, "If true, push the weights in the "
|
||||
"lattice to the start.");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
@ -66,18 +75,25 @@ int main(int argc, char *argv[]) {
|
|||
std::string key = clat_reader.Key();
|
||||
CompactLattice clat = clat_reader.Value();
|
||||
KALDI_VLOG(1) << "Processing lattice for utterance " << key;
|
||||
if (!PushCompactLattice(&clat)) {
|
||||
KALDI_WARN << "Failure in pushing lattice (bad lattice?) for key " << key;
|
||||
if (push_strings && !PushCompactLatticeStrings(&clat)) {
|
||||
KALDI_WARN << "Failure in pushing lattice strings (bad lattice?), "
|
||||
<< "for key " << key;
|
||||
n_err++;
|
||||
} else {
|
||||
if (clat.NumStates() == 0) {
|
||||
KALDI_WARN << "Empty lattice for key " << key;
|
||||
n_err++;
|
||||
} else {
|
||||
clat_writer.Write(key, clat);
|
||||
n_done++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (push_weights && !PushCompactLatticeWeights(&clat)) {
|
||||
KALDI_WARN << "Failure in pushing lattice weights (bad lattice?),"
|
||||
<< "for key " << key ;
|
||||
n_err++;
|
||||
continue;
|
||||
}
|
||||
if (clat.NumStates() == 0) {
|
||||
KALDI_WARN << "Empty lattice for key " << key;
|
||||
n_err++;
|
||||
continue;
|
||||
}
|
||||
clat_writer.Write(key, clat);
|
||||
n_done++;
|
||||
}
|
||||
KALDI_LOG << "Pushed " << n_done << " lattices, errors on " << n_err;
|
||||
return (n_done != 0 ? 0 : 1);
|
||||
|
|
|
@ -44,10 +44,11 @@ void CompressedMatrix::CopyFromMat(
|
|||
// we need to ensure that the percentile_0 through percentile_100
|
||||
// are in strictly increasing order.
|
||||
float min_value = mat.Min(), max_value = mat.Max();
|
||||
float safety_margin = 0.001 * (fabs(min_value) + fabs(max_value));
|
||||
if (safety_margin == 0.0) safety_margin = 1.0;
|
||||
min_value -= safety_margin;
|
||||
max_value += safety_margin;
|
||||
if (max_value == min_value)
|
||||
max_value = min_value + (1.0 + fabs(min_value)); // ensure it's strictly
|
||||
// greater than min_value,
|
||||
// even if matrix is
|
||||
// constant.
|
||||
|
||||
global_header.min_value = min_value;
|
||||
global_header.range = max_value - min_value;
|
||||
|
@ -125,17 +126,25 @@ void CompressedMatrix::ComputeColHeader(
|
|||
|
||||
if (num_rows >= 5) {
|
||||
int quarter_nr = num_rows/4;
|
||||
// The elements at positions 0, quarter_nr,
|
||||
// std::sort(sdata.begin(), sdata.end());
|
||||
// The elements at positions 0, quarter_nr,
|
||||
// 3*quarter_nr, and num_rows-1 need to be in sorted order.
|
||||
// Note: the + 1's below are not necessary but may speed things
|
||||
// up slightly.
|
||||
std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
|
||||
// Now, sdata.begin() + quarter_nr contains the element that would appear
|
||||
// in sorted order, in that position.
|
||||
std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
|
||||
// Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
|
||||
// that would appear at those positions in sorted order.
|
||||
std::nth_element(sdata.begin() + quarter_nr + 1,
|
||||
sdata.begin() + (3*quarter_nr) + 1, sdata.end());
|
||||
std::nth_element(sdata.begin() + (3*quarter_nr), sdata.end() - 1,
|
||||
sdata.begin() + (3*quarter_nr), sdata.end());
|
||||
// Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
|
||||
// 3*quarter_nr, contain the elements that would appear at those positions
|
||||
// in sorted order.
|
||||
std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
|
||||
sdata.end());
|
||||
// Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
|
||||
// 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
|
||||
// at those positions in sorted order.
|
||||
|
||||
header->percentile_0 = FloatToUint16(global_header, sdata[0]);
|
||||
header->percentile_25 = std::max<uint16>(
|
||||
|
@ -228,7 +237,7 @@ void CompressedMatrix::CompressColumn(
|
|||
unsigned char *byte_data) {
|
||||
ComputeColHeader(global_header, data, stride,
|
||||
num_rows, header);
|
||||
|
||||
|
||||
float p0 = Uint16ToFloat(global_header, header->percentile_0),
|
||||
p25 = Uint16ToFloat(global_header, header->percentile_25),
|
||||
p75 = Uint16ToFloat(global_header, header->percentile_75),
|
||||
|
@ -248,6 +257,9 @@ void* CompressedMatrix::AllocateData(int32 num_bytes) {
|
|||
return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
|
||||
}
|
||||
|
||||
#define DEBUG_COMPRESSED_MATRIX 0 // Must be zero for Kaldi to work; use 1 only
|
||||
// for debugging.
|
||||
|
||||
void CompressedMatrix::Write(std::ostream &os, bool binary) const {
|
||||
if (binary) { // Binary-mode write:
|
||||
WriteToken(os, binary, "CM");
|
||||
|
@ -264,12 +276,13 @@ void CompressedMatrix::Write(std::ostream &os, bool binary) const {
|
|||
}
|
||||
} else {
|
||||
// In text mode, just use the same format as a regular matrix.
|
||||
// This is not compressed.
|
||||
#if DEBUG_COMPRESSED_MATRIX == 0
|
||||
Matrix<BaseFloat> temp_mat(this->NumRows(), this->NumCols(),
|
||||
kUndefined);
|
||||
this->CopyToMat(&temp_mat);
|
||||
temp_mat.Write(os, binary);
|
||||
|
||||
/*
|
||||
#else
|
||||
// Text-mode writing. Only really useful for debug, but we'll implement it.
|
||||
if (data_ == NULL) {
|
||||
os << 0.0 << ' ' << 0.0 << ' ' << 0 << ' ' << 0 << '\n';
|
||||
|
@ -288,7 +301,9 @@ void CompressedMatrix::Write(std::ostream &os, bool binary) const {
|
|||
for (int32 j = 0; j < h.num_rows; j++, c++)
|
||||
os << static_cast<int>(*c) << ' ';
|
||||
os << '\n';
|
||||
} */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (os.fail())
|
||||
KALDI_ERR << "Error writing compressed matrix to stream.";
|
||||
|
@ -316,11 +331,12 @@ void CompressedMatrix::Read(std::istream &is, bool binary) {
|
|||
is.read(reinterpret_cast<char*>(data_) + sizeof(GlobalHeader),
|
||||
remaining_size);
|
||||
} else { // Text-mode read.
|
||||
#if DEBUG_COMPRESSED_MATRIX == 0
|
||||
Matrix<BaseFloat> temp;
|
||||
temp.Read(is, binary);
|
||||
this->CopyFromMat(temp);
|
||||
/*
|
||||
// The old reading code...
|
||||
#else
|
||||
// The old reading code...
|
||||
GlobalHeader h;
|
||||
is >> h.min_value >> h.range >> h.num_rows >> h.num_cols;
|
||||
if (is.fail())
|
||||
|
@ -346,7 +362,8 @@ void CompressedMatrix::Read(std::istream &is, bool binary) {
|
|||
assert(i >= 0 && i <= 255);
|
||||
*c = static_cast<unsigned char>(i);
|
||||
}
|
||||
} */
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (is.fail())
|
||||
KALDI_ERR << "Failed to read data.";
|
||||
|
|
|
@ -123,7 +123,6 @@ class CompressedMatrix {
|
|||
uint16 percentile_100;
|
||||
};
|
||||
|
||||
// The following function is called in CopyToMatrix.
|
||||
template<typename Real>
|
||||
static void CompressColumn(const GlobalHeader &global_header,
|
||||
const Real *data, MatrixIndexT stride,
|
||||
|
|
|
@ -1346,7 +1346,7 @@ template<typename Real> static void UnitTestEig() {
|
|||
Pinv.Invert();
|
||||
Matrix<Real> D(dimM, dimM);
|
||||
CreateEigenvalueMatrix(real_eigs, imag_eigs, &D);
|
||||
|
||||
|
||||
// check that M = P D P^{-1}.
|
||||
Matrix<Real> tmp(dimM, dimM);
|
||||
tmp.AddMatMat(1.0, P, kNoTrans, D, kNoTrans, 0.0); // tmp = P * D
|
||||
|
@ -3838,6 +3838,22 @@ template<typename Real> static void UnitTestCompressedMatrix() {
|
|||
Matrix<Real> diff(M2);
|
||||
diff.AddMat(-1.0, M);
|
||||
|
||||
{ // Check that when compressing a matrix that has already been compressed,
|
||||
// and uncompressing, we get the same answer.
|
||||
CompressedMatrix cmat2(M2);
|
||||
Matrix<Real> M3(cmat.NumRows(), cmat.NumCols());
|
||||
cmat2.CopyToMat(&M3);
|
||||
if (!M2.ApproxEqual(M3, 1.0e-05)) {
|
||||
KALDI_LOG << "cmat is: ";
|
||||
cmat.Write(std::cout, false);
|
||||
KALDI_LOG << "cmat2 is: ";
|
||||
cmat2.Write(std::cout, false);
|
||||
KALDI_ERR << "Matrices differ " << M2 << " vs. " << M3 << ", M2 range is "
|
||||
<< M2.Min() << " to " << M2.Max() << ", M3 range is "
|
||||
<< M3.Min() << " to " << M3.Max();
|
||||
}
|
||||
}
|
||||
|
||||
// test CopyRowToVec
|
||||
for (MatrixIndexT i = 0; i < num_rows; i++) {
|
||||
Vector<Real> V(num_cols);
|
||||
|
@ -3891,6 +3907,7 @@ template<typename Real> static void UnitTestCompressedMatrix() {
|
|||
InitKaldiInputStream(ins, &binary_in);
|
||||
cmat2.Read(ins, binary_in);
|
||||
}
|
||||
#if 1
|
||||
{ // check that compressed-matrix can be read as matrix.
|
||||
bool binary_in;
|
||||
std::ifstream ins("tmpf", std::ios_base::in | std::ios_base::binary);
|
||||
|
@ -3900,6 +3917,7 @@ template<typename Real> static void UnitTestCompressedMatrix() {
|
|||
Matrix<Real> mat2(cmat2);
|
||||
AssertEqual(mat1, mat2);
|
||||
}
|
||||
#endif
|
||||
|
||||
Matrix<Real> M3(cmat2.NumRows(), cmat2.NumCols());
|
||||
cmat2.CopyToMat(&M3);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-example.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
// Copyright 2012-2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -34,7 +34,8 @@ void NnetTrainingExample::Write(std::ostream &os, bool binary) const {
|
|||
WriteBasicType(os, binary, labels[i].second);
|
||||
}
|
||||
WriteToken(os, binary, "<InputFrames>");
|
||||
input_frames.Write(os, binary);
|
||||
CompressedMatrix compressed(input_frames);
|
||||
compressed.Write(os, binary); // can be read as regular Matrix.
|
||||
WriteToken(os, binary, "<LeftContext>");
|
||||
WriteBasicType(os, binary, left_context);
|
||||
WriteToken(os, binary, "<SpkInfo>");
|
||||
|
|
|
@ -8,7 +8,6 @@ LDLIBS += $(CUDA_LDLIBS)
|
|||
|
||||
BINFILES = nnet-train-xent-hardlab-perutt \
|
||||
nnet-train-xent-hardlab-frmshuff \
|
||||
nnet-train-xent-hardlab-frmshuff-prior \
|
||||
nnet-train-mse-tgtmat-frmshuff \
|
||||
nnet-train-mmi-sequential \
|
||||
nnet-train-mpe-sequential \
|
||||
|
|
|
@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool binary_write = false;
|
||||
bool binary_write = true;
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
|
|||
int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=-2;
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,393 +0,0 @@
|
|||
// nnetbin/nnet-mpe.cc
|
||||
|
||||
// Copyright 2011-2013 Karel Vesely; Arnab Ghoshal
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "tree/context-dep.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "fstext/fstext-lib.h"
|
||||
#include "decoder/faster-decoder.h"
|
||||
#include "decoder/decodable-matrix.h"
|
||||
#include "lat/kaldi-lattice.h"
|
||||
#include "lat/lattice-functions.h"
|
||||
|
||||
#include "nnet/nnet-component.h"
|
||||
#include "nnet/nnet-activation.h"
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "util/timer.h"
|
||||
#include "cudamatrix/cu-device.h"
|
||||
|
||||
|
||||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
||||
void LatticeAcousticRescore(const Matrix<BaseFloat> &log_like,
|
||||
const TransitionModel &trans_model,
|
||||
const std::vector<int32> state_times,
|
||||
Lattice *lat) {
|
||||
kaldi::uint64 props = lat->Properties(fst::kFstProperties, false);
|
||||
if (!(props & fst::kTopSorted))
|
||||
KALDI_ERR << "Input lattice must be topologically sorted.";
|
||||
|
||||
KALDI_ASSERT(!state_times.empty());
|
||||
std::vector<std::vector<int32> > time_to_state(log_like.NumRows());
|
||||
for (size_t i = 0; i < state_times.size(); i++) {
|
||||
KALDI_ASSERT(state_times[i] >= 0);
|
||||
if (state_times[i] < log_like.NumRows()) // end state may be past this..
|
||||
time_to_state[state_times[i]].push_back(i);
|
||||
else
|
||||
KALDI_ASSERT(state_times[i] == log_like.NumRows()
|
||||
&& "There appears to be lattice/feature mismatch.");
|
||||
}
|
||||
|
||||
for (int32 t = 0; t < log_like.NumRows(); t++) {
|
||||
for (size_t i = 0; i < time_to_state[t].size(); i++) {
|
||||
int32 state = time_to_state[t][i];
|
||||
for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
|
||||
aiter.Next()) {
|
||||
LatticeArc arc = aiter.Value();
|
||||
int32 trans_id = arc.ilabel;
|
||||
if (trans_id != 0) { // Non-epsilon input label on arc
|
||||
int32 pdf_id = trans_model.TransitionIdToPdf(trans_id);
|
||||
arc.weight.SetValue2(-log_like(t, pdf_id) + arc.weight.Value2());
|
||||
aiter.SetValue(arc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
typedef kaldi::int32 int32;
|
||||
try {
|
||||
const char *usage =
|
||||
"Perform iteration of Neural Network MPE/sMBR training by stochastic "
|
||||
"gradient descent.\n"
|
||||
"Usage: nnet-mpe [options] <model-in> <transition-model-in> "
|
||||
"<feature-rspecifier> <den-lat-rspecifier> <ali-rspecifier> [<model-out>]\n"
|
||||
"e.g.: \n"
|
||||
" nnet-mpe nnet.init trans.mdl scp:train.scp scp:denlats.scp ark:train.ali "
|
||||
"nnet.iter1\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = false,
|
||||
crossvalidate = false;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("cross-validate", &crossvalidate,
|
||||
"Perform cross-validation (don't backpropagate)");
|
||||
|
||||
BaseFloat learn_rate = 0.00001,
|
||||
momentum = 0.0,
|
||||
l2_penalty = 0.0,
|
||||
l1_penalty = 0.0;
|
||||
|
||||
po.Register("learn-rate", &learn_rate, "Learning rate");
|
||||
po.Register("momentum", &momentum, "Momentum");
|
||||
po.Register("l2-penalty", &l2_penalty, "L2 penalty (weight decay)");
|
||||
po.Register("l1-penalty", &l1_penalty, "L1 penalty (promote sparsity)");
|
||||
|
||||
std::string feature_transform, class_frame_counts, silence_phones_str;
|
||||
po.Register("feature-transform", &feature_transform,
|
||||
"Feature transform Neural Network");
|
||||
po.Register("class-frame-counts", &class_frame_counts,
|
||||
"Class frame counts to compute the class priors");
|
||||
po.Register("silence-phones", &silence_phones_str, "Colon-separated list "
|
||||
"of integer id's of silence phones, e.g. 46:47");
|
||||
|
||||
BaseFloat acoustic_scale = 1.0,
|
||||
lm_scale = 1.0,
|
||||
old_acoustic_scale = 0.0;
|
||||
po.Register("acoustic-scale", &acoustic_scale,
|
||||
"Scaling factor for acoustic likelihoods");
|
||||
po.Register("lm-scale", &lm_scale,
|
||||
"Scaling factor for \"graph costs\" (including LM costs)");
|
||||
po.Register("old-acoustic-scale", &old_acoustic_scale,
|
||||
"Add in the scores in the input lattices with this scale, rather "
|
||||
"than discarding them.");
|
||||
|
||||
bool do_smbr = false;
|
||||
po.Register("do-smbr", &do_smbr, "Use state-level accuracies instead of "
|
||||
"phone accuracies.");
|
||||
|
||||
#if HAVE_CUDA == 1
|
||||
kaldi::int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
|
||||
"(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 6-(crossvalidate?1:0)) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_filename = po.GetArg(1),
|
||||
transition_model_filename = po.GetArg(2),
|
||||
feature_rspecifier = po.GetArg(3),
|
||||
den_lat_rspecifier = po.GetArg(4),
|
||||
ref_ali_rspecifier = po.GetArg(5);
|
||||
|
||||
std::string target_model_filename;
|
||||
if (!crossvalidate) {
|
||||
target_model_filename = po.GetArg(6);
|
||||
}
|
||||
|
||||
std::vector<int32> silence_phones;
|
||||
if (!kaldi::SplitStringToIntegers(silence_phones_str, ":", false,
|
||||
&silence_phones))
|
||||
KALDI_ERR << "Invalid silence-phones string " << silence_phones_str;
|
||||
kaldi::SortAndUniq(&silence_phones);
|
||||
if (silence_phones.empty())
|
||||
KALDI_LOG << "No silence phones specified.";
|
||||
|
||||
// Select the GPU
|
||||
#if HAVE_CUDA == 1
|
||||
if (use_gpu_id > -2)
|
||||
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
|
||||
#endif
|
||||
|
||||
Nnet nnet_transf;
|
||||
if (feature_transform != "") {
|
||||
nnet_transf.Read(feature_transform);
|
||||
}
|
||||
|
||||
Nnet nnet;
|
||||
nnet.Read(model_filename);
|
||||
// using activations directly: remove softmax, if present
|
||||
if (nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
|
||||
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
|
||||
nnet.RemoveLayer(nnet.LayerCount()-1);
|
||||
} else {
|
||||
KALDI_LOG << "The nnet was without softmax " << model_filename;
|
||||
}
|
||||
|
||||
nnet.SetLearnRate(learn_rate, NULL);
|
||||
nnet.SetMomentum(momentum);
|
||||
nnet.SetL2Penalty(l2_penalty);
|
||||
nnet.SetL1Penalty(l1_penalty);
|
||||
|
||||
TransitionModel trans_model;
|
||||
ReadKaldiObject(transition_model_filename, &trans_model);
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessLatticeReader den_lat_reader(den_lat_rspecifier);
|
||||
RandomAccessInt32VectorReader ref_ali_reader(ref_ali_rspecifier);
|
||||
|
||||
CuMatrix<BaseFloat> feats, feats_transf, nnet_out, nnet_diff;
|
||||
Matrix<BaseFloat> nnet_out_h, nnet_diff_h;
|
||||
|
||||
// Read the class-counts, compute priors
|
||||
CuVector<BaseFloat> log_priors;
|
||||
if (class_frame_counts != "") {
|
||||
Vector<BaseFloat> tmp_priors;
|
||||
Input in;
|
||||
in.OpenTextMode(class_frame_counts);
|
||||
tmp_priors.Read(in.Stream(), false);
|
||||
in.Close();
|
||||
|
||||
// create inv. priors, or log inv priors
|
||||
BaseFloat sum = tmp_priors.Sum();
|
||||
tmp_priors.Scale(1.0 / sum);
|
||||
tmp_priors.ApplyLog();
|
||||
|
||||
// push priors to GPU
|
||||
log_priors.Resize(tmp_priors.Dim());
|
||||
log_priors.CopyFromVec(tmp_priors);
|
||||
}
|
||||
|
||||
|
||||
Timer time;
|
||||
double time_now = 0;
|
||||
KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " STARTED";
|
||||
|
||||
int32 num_done = 0, num_no_ref_ali = 0, num_no_den_lat = 0,
|
||||
num_other_error = 0;
|
||||
|
||||
kaldi::int64 total_frames = 0;
|
||||
double total_frame_acc = 0.0, utt_frame_acc;
|
||||
|
||||
// do per-utterance processing
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
std::string utt = feature_reader.Key();
|
||||
if (!den_lat_reader.HasKey(utt)) {
|
||||
KALDI_WARN << "Utterance " << utt << ": found no lattice.";
|
||||
num_no_den_lat++;
|
||||
continue;
|
||||
}
|
||||
if (!ref_ali_reader.HasKey(utt)) {
|
||||
KALDI_WARN << "Utterance " << utt << ": found no reference alignment.";
|
||||
num_no_ref_ali++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 1) get the features, numerator alignment
|
||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||
const std::vector<int32> &ref_ali = ref_ali_reader.Value(utt);
|
||||
// check for temporal length of numerator alignments
|
||||
if (static_cast<MatrixIndexT>(ref_ali.size()) != mat.NumRows()) {
|
||||
KALDI_WARN << "Numerator alignment has wrong length "
|
||||
<< ref_ali.size() << " vs. "<< mat.NumRows();
|
||||
num_other_error++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2) get the denominator lattice, preprocess
|
||||
Lattice den_lat = den_lat_reader.Value(utt);
|
||||
if (old_acoustic_scale != 1.0) {
|
||||
fst::ScaleLattice(fst::AcousticLatticeScale(old_acoustic_scale),
|
||||
&den_lat);
|
||||
}
|
||||
// sort it topologically if not already so
|
||||
kaldi::uint64 props = den_lat.Properties(fst::kFstProperties, false);
|
||||
if (!(props & fst::kTopSorted)) {
|
||||
if (fst::TopSort(&den_lat) == false)
|
||||
KALDI_ERR << "Cycles detected in lattice.";
|
||||
}
|
||||
// get the lattice length and times of states
|
||||
vector<int32> state_times;
|
||||
int32 max_time = kaldi::LatticeStateTimes(den_lat, &state_times);
|
||||
// check for temporal length of denominator lattices
|
||||
if (max_time != mat.NumRows()) {
|
||||
KALDI_WARN << "Denominator lattice has wrong length " << max_time
|
||||
<< " vs. " << mat.NumRows();
|
||||
num_other_error++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3) propagate the feature to get the log-posteriors (nnet w/o sofrmax)
|
||||
// push features to GPU
|
||||
feats = mat;
|
||||
// possibly apply transform
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
// propagate through the nnet (assuming w/o softmax)
|
||||
nnet.Propagate(feats_transf, &nnet_out);
|
||||
// subtract the log_priors
|
||||
if (log_priors.Dim() > 0) {
|
||||
nnet_out.AddVecToRows(-1.0, log_priors);
|
||||
}
|
||||
// transfer it back to the host
|
||||
int32 num_frames = nnet_out.NumRows(),
|
||||
num_pdfs = nnet_out.NumCols();
|
||||
nnet_out_h.Resize(num_frames, num_pdfs, kUndefined);
|
||||
nnet_out.CopyToMat(&nnet_out_h);
|
||||
|
||||
// 4) rescore the latice
|
||||
LatticeAcousticRescore(nnet_out_h, trans_model, state_times, &den_lat);
|
||||
if (acoustic_scale != 1.0 || lm_scale != 1.0)
|
||||
fst::ScaleLattice(fst::LatticeScale(lm_scale, acoustic_scale), &den_lat);
|
||||
|
||||
// 5) get the posteriors
|
||||
vector< std::map<int32, char> > arc_accs;
|
||||
arc_accs.resize(ref_ali.size());
|
||||
kaldi::Posterior post;
|
||||
|
||||
if (do_smbr) { // use state-level accuracies, i.e. sMBR estimation
|
||||
for (size_t i = 0; i < ref_ali.size(); i++) {
|
||||
int32 pdf = trans_model.TransitionIdToPdf(ref_ali[i]);
|
||||
arc_accs[i][pdf] = 1;
|
||||
}
|
||||
utt_frame_acc = LatticeForwardBackwardSmbr(den_lat, trans_model,
|
||||
arc_accs, silence_phones,
|
||||
&post);
|
||||
} else { // use phone-level accuracies, i.e. regular MPE
|
||||
for (size_t i = 0; i < ref_ali.size(); i++) {
|
||||
int32 phone = trans_model.TransitionIdToPhone(ref_ali[i]);
|
||||
arc_accs[i][phone] = 1;
|
||||
}
|
||||
utt_frame_acc = kaldi::LatticeForwardBackwardMpe(den_lat, trans_model,
|
||||
arc_accs, &post,
|
||||
silence_phones);
|
||||
}
|
||||
|
||||
// 6) convert the Posterior to a matrix
|
||||
nnet_diff_h.Resize(num_frames, num_pdfs, kSetZero);
|
||||
for (int32 t = 0; t < post.size(); t++) {
|
||||
for (int32 arc = 0; arc < post[t].size(); arc++) {
|
||||
int32 pdf = trans_model.TransitionIdToPdf(post[t][arc].first);
|
||||
nnet_diff_h(t, pdf) -= post[t][arc].second;
|
||||
}
|
||||
}
|
||||
|
||||
KALDI_VLOG(1) << "Processed lattice for utterance " << num_done + 1
|
||||
<< " (" << utt << "): found " << den_lat.NumStates()
|
||||
<< " states and " << fst::NumArcs(den_lat) << " arcs.";
|
||||
|
||||
KALDI_VLOG(1) << "Utterance " << utt << ": Average frame accuracy = "
|
||||
<< (utt_frame_acc/num_frames) << " over " << num_frames
|
||||
<< " frames.";
|
||||
|
||||
// 9) backpropagate through the nnet
|
||||
if (!crossvalidate) {
|
||||
nnet_diff = nnet_diff_h;
|
||||
nnet.Backpropagate(nnet_diff, NULL);
|
||||
}
|
||||
|
||||
// increase time counter
|
||||
total_frame_acc += utt_frame_acc;
|
||||
total_frames += num_frames;
|
||||
num_done++;
|
||||
|
||||
if (num_done % 100 == 0) {
|
||||
time_now = time.Elapsed();
|
||||
KALDI_VLOG(1) << "After " << num_done << "utterances: time elapsed = "
|
||||
<< time_now/60 << " min; processed " << total_frames/time_now
|
||||
<< " frames per second.";
|
||||
}
|
||||
}
|
||||
|
||||
if (!crossvalidate) {
|
||||
// add the softmax layer back before writing
|
||||
KALDI_LOG << "Appending the softmax " << target_model_filename;
|
||||
nnet.AppendLayer(new Softmax(nnet.OutputDim(),nnet.OutputDim(),&nnet));
|
||||
//store the nnet
|
||||
nnet.Write(target_model_filename, binary);
|
||||
}
|
||||
|
||||
time_now = time.Elapsed();
|
||||
KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " FINISHED; "
|
||||
<< "Time taken = " << time_now/60 << " min; processed "
|
||||
<< (total_frames/time_now) << " frames per second.";
|
||||
|
||||
KALDI_LOG << "Done " << num_done << " files, "
|
||||
<< num_no_ref_ali << " with no reference alignments, "
|
||||
<< num_no_den_lat << " with no lattices, "
|
||||
<< num_other_error << " with other errors.";
|
||||
|
||||
KALDI_LOG << "Overall average frame-accuracy is "
|
||||
<< (total_frame_acc/total_frames) << " over " << total_frames
|
||||
<< " frames.";
|
||||
|
||||
#if HAVE_CUDA == 1
|
||||
CuDevice::Instantiate().PrintProfile();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -102,7 +102,7 @@ int main(int argc, char *argv[]) {
|
|||
NnetTrainOptions trn_opts; trn_opts.learn_rate=0.00001;
|
||||
trn_opts.Register(&po);
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
||||
std::string feature_transform;
|
||||
|
@ -134,6 +134,9 @@ int main(int argc, char *argv[]) {
|
|||
kaldi::int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
|
||||
"(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -100,7 +100,7 @@ int main(int argc, char *argv[]) {
|
|||
NnetTrainOptions trn_opts; trn_opts.learn_rate=0.00001;
|
||||
trn_opts.Register(&po);
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
||||
std::string feature_transform;
|
||||
|
@ -133,6 +133,9 @@ int main(int argc, char *argv[]) {
|
|||
kaldi::int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID "
|
||||
"(-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -46,7 +46,7 @@ int main(int argc, char *argv[]) {
|
|||
NnetTrainOptions trn_opts;
|
||||
trn_opts.Register(&po);
|
||||
|
||||
bool binary = false,
|
||||
bool binary = true,
|
||||
crossvalidate = false,
|
||||
randomize = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
@ -64,6 +64,9 @@ int main(int argc, char *argv[]) {
|
|||
#if HAVE_CUDA==1
|
||||
int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -1,223 +0,0 @@
|
|||
// nnetbin/nnet-train-xent-hardlab-frmshuff-prior.cc
|
||||
|
||||
// Copyright 2011-2013 Karel Vesely, Brno University of Technology
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet/nnet-trnopts.h"
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "nnet/nnet-loss-prior.h"
|
||||
#include "nnet/nnet-cache.h"
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "util/timer.h"
|
||||
#include "cudamatrix/cu-device.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
try {
|
||||
const char *usage =
|
||||
"Perform one iteration of Neural Network training by stochastic gradient descent.\n"
|
||||
"Usage: nnet-train-xent-hardlab-frmshuff-prior [options] <model-in> <feature-rspecifier> <alignments-rspecifier> [<model-out>]\n"
|
||||
"e.g.: \n"
|
||||
" nnet-train-xent-hardlab-frmshuff-prior nnet.init scp:train.scp ark:train.ali nnet.iter1\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
NnetTrainOptions trn_opts;
|
||||
trn_opts.Register(&po);
|
||||
|
||||
bool binary = false,
|
||||
crossvalidate = false,
|
||||
randomize = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("cross-validate", &crossvalidate, "Perform cross-validation (don't backpropagate)");
|
||||
po.Register("randomize", &randomize, "Perform the frame-level shuffling within the Cache::");
|
||||
|
||||
std::string feature_transform;
|
||||
po.Register("feature-transform", &feature_transform, "Feature transform in Nnet format");
|
||||
|
||||
int32 bunchsize=512, cachesize=32768, seed=777;
|
||||
po.Register("bunchsize", &bunchsize, "Size of weight update block");
|
||||
po.Register("cachesize", &cachesize, "Size of cache for frame level shuffling (max 8388479)");
|
||||
|
||||
std::string prior_rxfile;
|
||||
po.Register("prior", &prior_rxfile, "Priors of the training data to scale down gradients of represented PDFs [REQUIRED]");
|
||||
BaseFloat prior_softener = 1000; // ie. use uniform prior (disable reweighting)
|
||||
BaseFloat prior_silence_amount = 1.0; // ie. disable silence downscaling (use all the silence data available)
|
||||
po.Register("prior-softener", &prior_softener, "Prior softener, scales uniform part added to prior before doing the inverse");
|
||||
po.Register("prior-silence-amount", &prior_silence_amount, "Define how much of ``effective silence data'' should be used for training, (1.0 will bypass silence scaling)");
|
||||
int32 prior_silence_numpdf = 5;
|
||||
po.Register("prior-silence-numpdf", &prior_silence_numpdf, "Number of initial PDFs which model the silence");
|
||||
|
||||
#if HAVE_CUDA==1
|
||||
int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 4-(crossvalidate?1:0)) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_filename = po.GetArg(1),
|
||||
feature_rspecifier = po.GetArg(2),
|
||||
alignments_rspecifier = po.GetArg(3);
|
||||
|
||||
std::string target_model_filename;
|
||||
if (!crossvalidate) {
|
||||
target_model_filename = po.GetArg(4);
|
||||
}
|
||||
|
||||
//set the seed to the pre-defined value
|
||||
srand(seed);
|
||||
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
//Select the GPU
|
||||
#if HAVE_CUDA==1
|
||||
CuDevice::Instantiate().SelectGpuId(use_gpu_id);
|
||||
#endif
|
||||
|
||||
Nnet nnet_transf;
|
||||
if(feature_transform != "") {
|
||||
nnet_transf.Read(feature_transform);
|
||||
}
|
||||
|
||||
Nnet nnet;
|
||||
nnet.Read(model_filename);
|
||||
nnet.SetTrainOptions(trn_opts);
|
||||
|
||||
kaldi::int64 total_frames = 0;
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier);
|
||||
|
||||
Cache cache;
|
||||
cachesize = (cachesize/bunchsize)*bunchsize; // ensure divisibility
|
||||
cache.Init(cachesize, bunchsize);
|
||||
|
||||
XentPrior xent;
|
||||
if(prior_rxfile != "") {
|
||||
xent.ReadPriors(prior_rxfile, prior_softener, prior_silence_amount, prior_silence_numpdf);
|
||||
} else {
|
||||
KALDI_ERR << "Missing prior file!";
|
||||
}
|
||||
|
||||
CuMatrix<BaseFloat> feats, feats_transf, nnet_in, nnet_out, obj_diff;
|
||||
std::vector<int32> targets;
|
||||
|
||||
Timer time;
|
||||
double time_now = 0;
|
||||
double time_next = 0;
|
||||
KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " STARTED";
|
||||
|
||||
int32 num_done = 0, num_no_alignment = 0, num_other_error = 0, num_cache = 0;
|
||||
while (1) {
|
||||
// fill the cache
|
||||
while (!cache.Full() && !feature_reader.Done()) {
|
||||
std::string utt = feature_reader.Key();
|
||||
if (!alignments_reader.HasKey(utt)) {
|
||||
num_no_alignment++;
|
||||
} else {
|
||||
// get feature alignment pair
|
||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||
const std::vector<int32> &alignment = alignments_reader.Value(utt);
|
||||
// check the length of the data
|
||||
if ((int32)alignment.size() != mat.NumRows()) {
|
||||
KALDI_WARN << "Alignment has wrong length, ali "<< (alignment.size()) << " vs. feats "<< (mat.NumRows()) << ", " << utt;
|
||||
num_other_error++;
|
||||
} else { //length OK
|
||||
// push features to GPU
|
||||
feats.Resize(mat.NumRows(), mat.NumCols(), kUndefined);
|
||||
feats.CopyFromMat(mat);
|
||||
// possibly apply transform
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
// add to cache
|
||||
cache.AddData(feats_transf, alignment);
|
||||
num_done++;
|
||||
}
|
||||
}
|
||||
Timer t_features;
|
||||
feature_reader.Next();
|
||||
time_next += t_features.Elapsed();
|
||||
|
||||
//report the speed
|
||||
if (num_done % 1000 == 0) {
|
||||
time_now = time.Elapsed();
|
||||
KALDI_VLOG(1) << "After " << num_done << " utterances: time elapsed = "
|
||||
<< time_now/60 << " min; processed " << total_frames/time_now
|
||||
<< " frames per second.";
|
||||
}
|
||||
|
||||
}
|
||||
// randomize
|
||||
if (!crossvalidate && randomize) {
|
||||
cache.Randomize();
|
||||
}
|
||||
// report
|
||||
KALDI_VLOG(1) << "Cache #" << ++num_cache << " "
|
||||
<< (cache.Randomized()?"[RND]":"[NO-RND]")
|
||||
<< " segments: " << num_done
|
||||
<< " frames: " << static_cast<double>(total_frames)/360000 << "h";
|
||||
// train with the cache
|
||||
while (!cache.Empty()) {
|
||||
// get block of feature/target pairs
|
||||
cache.GetBunch(&nnet_in, &targets);
|
||||
// train
|
||||
nnet.Propagate(nnet_in, &nnet_out);
|
||||
xent.EvalVec(nnet_out, targets, &obj_diff);
|
||||
if (!crossvalidate) {
|
||||
nnet.Backpropagate(obj_diff, NULL);
|
||||
}
|
||||
total_frames += nnet_in.NumRows();
|
||||
}
|
||||
|
||||
// stop training when no more data
|
||||
if (feature_reader.Done()) break;
|
||||
}
|
||||
|
||||
if (!crossvalidate) {
|
||||
nnet.Write(target_model_filename, binary);
|
||||
}
|
||||
|
||||
KALDI_LOG << (crossvalidate?"CROSSVALIDATE":"TRAINING") << " FINISHED "
|
||||
<< time.Elapsed()/60 << "min, fps" << total_frames/time.Elapsed()
|
||||
<< ", feature wait " << time_next << "s";
|
||||
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment
|
||||
<< " with no alignments, " << num_other_error
|
||||
<< " with other errors.";
|
||||
|
||||
KALDI_LOG << xent.Report();
|
||||
|
||||
#if HAVE_CUDA==1
|
||||
CuDevice::Instantiate().PrintProfile();
|
||||
#endif
|
||||
|
||||
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -42,7 +42,7 @@ int main(int argc, char *argv[]) {
|
|||
NnetTrainOptions trn_opts;
|
||||
trn_opts.Register(&po);
|
||||
|
||||
bool binary = false,
|
||||
bool binary = true,
|
||||
crossvalidate = false,
|
||||
randomize = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
@ -63,6 +63,9 @@ int main(int argc, char *argv[]) {
|
|||
#if HAVE_CUDA==1
|
||||
int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
|
|||
NnetTrainOptions trn_opts;
|
||||
trn_opts.Register(&po);
|
||||
|
||||
bool binary = false,
|
||||
bool binary = true,
|
||||
crossvalidate = false;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("cross-validate", &crossvalidate, "Perform cross-validation (don't backpropagate)");
|
||||
|
@ -52,6 +52,9 @@ int main(int argc, char *argv[]) {
|
|||
#if HAVE_CUDA==1
|
||||
int32 use_gpu_id=-2;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -1,76 +0,0 @@
|
|||
// nnetbin/nnet-trim-last-n-layers.cc
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "nnet/nnet-nnet.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Trim ending part of the MLP\n"
|
||||
"Usage: nnet-trim-last-n-layers [options] <model-in> <model-out>\n"
|
||||
"e.g.:\n"
|
||||
" nnet-trim-last-n-layers --binary=false nnet.mdl nnet_txt.mdl\n";
|
||||
|
||||
|
||||
bool binary_write = false;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
|
||||
int32 trim_num = 0;
|
||||
po.Register("n", &trim_num, "Number of transforms to be trimmed (include simgoid/softmax)");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_in_filename = po.GetArg(1),
|
||||
model_out_filename = po.GetArg(2);
|
||||
|
||||
Nnet nnet;
|
||||
{
|
||||
bool binary_read;
|
||||
Input ki(model_in_filename, &binary_read);
|
||||
nnet.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
{
|
||||
Output ko(model_out_filename, binary_write);
|
||||
int32 write_num_layers = nnet.LayerCount() - trim_num;
|
||||
nnet.WriteFrontLayers(ko.Stream(), binary_write, write_num_layers);
|
||||
}
|
||||
|
||||
KALDI_LOG << "Written model to " << model_out_filename;
|
||||
return 0;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
|
|||
" rbm-convert-to-nnet --binary=false rbm.mdl nnet.mdl\n";
|
||||
|
||||
|
||||
bool binary_write = false;
|
||||
bool binary_write = true;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
|
|
|
@ -68,6 +68,9 @@ int main(int argc, char *argv[]) {
|
|||
#if HAVE_CUDA==1
|
||||
int32 use_gpu_id=-2 ;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Manually select GPU by its ID (-2 automatic selection, -1 disable GPU, 0..N select GPU)");
|
||||
#else
|
||||
int32 use_gpu_id=0;
|
||||
po.Register("use-gpu-id", &use_gpu_id, "Unused, kaldi is compiled w/o CUDA");
|
||||
#endif
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -5,7 +5,12 @@ include ../kaldi.mk
|
|||
|
||||
# The PA_RingBuffer interface is internal and is not exported in the .so libray
|
||||
# so we have to link against the static one
|
||||
EXTRA_LDLIBS = ../../tools/portaudio/install/lib/libportaudio.a
|
||||
|
||||
ifneq "$(wildcard ../../tools/portaudio/install/lib/libportaudio.a)" ""
|
||||
EXTRA_LDLIBS = ../../tools/portaudio/install/lib/libportaudio.a
|
||||
else
|
||||
EXTRA_LDLIBS = ../../tools/portaudio/install/lib64/libportaudio.a
|
||||
endif
|
||||
|
||||
UNAME=$(shell uname)
|
||||
ifeq ($(UNAME), Linux)
|
||||
|
|
|
@ -178,7 +178,11 @@ void SplitStatsByMap(const BuildTreeStatsType &stats, const EventMap &e, std::ve
|
|||
const EventType &evec = iter->first;
|
||||
EventAnswerType ans;
|
||||
if (!e.Map(evec, &ans)) // this is an error--could not map it.
|
||||
KALDI_ERR << "SplitStatsByMap: could not map event vector " << EventTypeToString(evec);
|
||||
KALDI_ERR << "SplitStatsByMap: could not map event vector " << EventTypeToString(evec)
|
||||
<< "if error seen during tree-building, check that "
|
||||
<< "--context-width and --central-position match stats, "
|
||||
<< "and that phones that are context-independent (CI) during "
|
||||
<< "stats accumulation do not share roots with non-CI phones.";
|
||||
size = std::max(size, (size_t)(ans+1));
|
||||
}
|
||||
stats_out->resize(size);
|
||||
|
|
Загрузка…
Ссылка в новой задаче