(BABEL) Adding missing configurations for using pitch even for BP non-tonal languages

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3275 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Jan Trmal 2013-12-05 18:55:52 +00:00
Родитель f7a4111875
Коммит 7634751e59
2 изменённых файлов: 155 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,78 @@
# include common settings for limitedLP systems.
. conf/common.limitedLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training
train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list
dev2h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list
dev10h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval
eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list
eval_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.kwlist2.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=2500
numGaussTri2=36000
numLeavesTri3=2500
numGaussTri3=36000
numLeavesMLLT=2500
numGaussMLLT=36000
numLeavesSAT=2500
numGaussSAT=36000
numGaussUBM=600
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/105-turkish/release-babel105b-v0.4-rc1/conversational/reference_materials/lexicon.sub-train.txt
#http://demo.icu-project.org/icu-bin/translit
icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' )
#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" )
#keyword search settings
duptime=0.5
case_insensitive=true

Просмотреть файл

@ -0,0 +1,77 @@
# include common settings for limitedLP systems.
. conf/common.limitedLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list
dev2h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=23
#Official DEV data files
dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list
dev10h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval
eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.kwlist2.xml
eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=2500
numGaussTri2=36000
numLeavesTri3=2500
numGaussTri3=36000
numLeavesMLLT=2500
numGaussMLLT=36000
numLeavesSAT=2500
numGaussSAT=36000
numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_ffv=false
use_pitch=true
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/106-tagalog/release-babel106b-v0.2g-sub-train/conversational/reference_materials/lexicon.sub-train.txt
#keyword search settings
duptime=0.5
case_insensitive=true