зеркало из https://github.com/mozilla/kaldi.git
sandbox/language_id: Fixed a bug causing malformed wav.scp files
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3753 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
9f4d83f00e
Коммит
06f761d922
|
@ -50,7 +50,10 @@ while($line = <LANGS>) {
|
|||
$channel = substr($channel, 0, 1);
|
||||
|
||||
$wav = `find $db_dir -name "$seg_id*"`;
|
||||
|
||||
chomp($wav);
|
||||
if ($wav eq "") {
|
||||
next;
|
||||
}
|
||||
# Small adjustments needed to language format.
|
||||
if ($lang eq "mandarin") {
|
||||
$lang = "chinese.mandarin.mainland";
|
||||
|
@ -70,8 +73,7 @@ while($line = <LANGS>) {
|
|||
next;
|
||||
}
|
||||
$uttId = $num_lang{$abbr_lang{$lang}}."_".$seg_id."_".$conv_id."_".$channel;
|
||||
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n";
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav \|\n";
|
||||
print UTT2SPK "$uttId $uttId\n";
|
||||
print UTT2LANG "$uttId $lang\n";
|
||||
print SPK2GEN "$uttId $gender\n";
|
||||
|
@ -135,8 +137,7 @@ for $set ("lid96d1", "lid96e1") {
|
|||
|
||||
$wav = "$db_dir/$duration/$seg_id.sph";
|
||||
$uttId = $num_lang{$abbr_lang{$lang}}."_".$seg_id."_".$set."_".$duration;
|
||||
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n";
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav \|\n";
|
||||
print UTT2SPK "$uttId $uttId\n";
|
||||
print UTT2LANG "$uttId $lang\n";
|
||||
# Gender information is absent here, defaulting to male
|
||||
|
|
|
@ -100,7 +100,10 @@ while($line = <KEY>) {
|
|||
($seg_id, $lang, $dialect, $conv_id, $channel,
|
||||
$cut, $dur, $corp, $gender, $loc, $alt_lang) = split(" ", $line);
|
||||
$wav = `find $db_dir -name "$seg_id*"`;
|
||||
|
||||
chomp($wav);
|
||||
if ($wav eq "") {
|
||||
next;
|
||||
}
|
||||
$lang = lc $lang;
|
||||
$dialect = lc $dialect;
|
||||
if ($dialect eq "na") {
|
||||
|
|
Загрузка…
Ссылка в новой задаче