зеркало из https://github.com/mozilla/kaldi.git
(BABEL) Adding missing configurations for using pitch even for BP non-tonal languages
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3275 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
f7a4111875
Коммит
7634751e59
|
@ -0,0 +1,78 @@
|
|||
# include common settings for limitedLP systems.
|
||||
. conf/common.limitedLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training
|
||||
train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list
|
||||
train_nj=16
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
|
||||
dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list
|
||||
dev2h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
|
||||
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.stm
|
||||
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
|
||||
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
|
||||
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=18
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list
|
||||
dev10h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
|
||||
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.stm
|
||||
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
|
||||
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
|
||||
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
|
||||
dev10h_nj=32
|
||||
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval
|
||||
eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list
|
||||
eval_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
|
||||
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.ecf.xml
|
||||
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.kwlist2.xml
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=2500
|
||||
numGaussTri2=36000
|
||||
numLeavesTri3=2500
|
||||
numGaussTri3=36000
|
||||
numLeavesMLLT=2500
|
||||
numGaussMLLT=36000
|
||||
numLeavesSAT=2500
|
||||
numGaussSAT=36000
|
||||
numGaussUBM=600
|
||||
numLeavesSGMM=5000
|
||||
numGaussSGMM=18000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
use_pitch=true
|
||||
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/105-turkish/release-babel105b-v0.4-rc1/conversational/reference_materials/lexicon.sub-train.txt
|
||||
#http://demo.icu-project.org/icu-bin/translit
|
||||
icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' )
|
||||
#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" )
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
# include common settings for limitedLP systems.
|
||||
. conf/common.limitedLP || exit 1;
|
||||
|
||||
#speech corpora files location
|
||||
train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/
|
||||
train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list
|
||||
train_nj=16
|
||||
|
||||
#RADICAL DEV data files
|
||||
dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
|
||||
dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list
|
||||
dev2h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
|
||||
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
|
||||
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
|
||||
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
|
||||
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
|
||||
dev2h_subset_ecf=true
|
||||
dev2h_nj=23
|
||||
|
||||
#Official DEV data files
|
||||
dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
|
||||
dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list
|
||||
dev10h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
|
||||
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
|
||||
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
|
||||
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
|
||||
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
|
||||
dev10h_nj=32
|
||||
|
||||
|
||||
#Official EVAL period evaluation data files
|
||||
eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval
|
||||
eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
|
||||
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.ecf.xml
|
||||
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.kwlist2.xml
|
||||
eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
|
||||
eval_nj=64
|
||||
|
||||
#Official (POST-)EVAL evaluation data portion
|
||||
evalpart1_data_dir=
|
||||
evalpart1_data_list=
|
||||
evalpart1_data_cmudb=
|
||||
evalpart1_stm_file=
|
||||
evalpart1_ecf_file=
|
||||
evalpart1_rttm_file=
|
||||
evalpart1_kwlist_file=
|
||||
evalpart1_nj=21
|
||||
|
||||
# Acoustic model parameters
|
||||
numLeavesTri1=1000
|
||||
numGaussTri1=10000
|
||||
numLeavesTri2=2500
|
||||
numGaussTri2=36000
|
||||
numLeavesTri3=2500
|
||||
numGaussTri3=36000
|
||||
numLeavesMLLT=2500
|
||||
numGaussMLLT=36000
|
||||
numLeavesSAT=2500
|
||||
numGaussSAT=36000
|
||||
numGaussUBM=750
|
||||
numLeavesSGMM=5000
|
||||
numGaussSGMM=18000
|
||||
|
||||
# Lexicon and Language Model parameters
|
||||
oovSymbol="<unk>"
|
||||
lexiconFlags="--oov <unk>"
|
||||
|
||||
use_ffv=false
|
||||
use_pitch=true
|
||||
# Scoring protocols (dummy GLM file to appease the scoring script)
|
||||
#glmFile=./conf/glm
|
||||
lexicon_file=/export/babel/data/106-tagalog/release-babel106b-v0.2g-sub-train/conversational/reference_materials/lexicon.sub-train.txt
|
||||
|
||||
#keyword search settings
|
||||
duptime=0.5
|
||||
case_insensitive=true
|
||||
|
Загрузка…
Ссылка в новой задаче