sandbox/language_id: Fixed a bug causing malformed wav.scp files

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3753 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
David Snyder 2014-03-09 19:27:26 +00:00
Родитель 9f4d83f00e
Коммит 06f761d922
2 изменённых файлов: 10 добавлений и 6 удалений

Просмотреть файл

@ -50,7 +50,10 @@ while($line = <LANGS>) {
$channel = substr($channel, 0, 1);
$wav = `find $db_dir -name "$seg_id*"`;
chomp($wav);
if ($wav eq "") {
next;
}
# Small adjustments needed to language format.
if ($lang eq "mandarin") {
$lang = "chinese.mandarin.mainland";
@ -70,8 +73,7 @@ while($line = <LANGS>) {
next;
}
$uttId = $num_lang{$abbr_lang{$lang}}."_".$seg_id."_".$conv_id."_".$channel;
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n";
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav \|\n";
print UTT2SPK "$uttId $uttId\n";
print UTT2LANG "$uttId $lang\n";
print SPK2GEN "$uttId $gender\n";
@ -135,8 +137,7 @@ for $set ("lid96d1", "lid96e1") {
$wav = "$db_dir/$duration/$seg_id.sph";
$uttId = $num_lang{$abbr_lang{$lang}}."_".$seg_id."_".$set."_".$duration;
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n";
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav \|\n";
print UTT2SPK "$uttId $uttId\n";
print UTT2LANG "$uttId $lang\n";
# Gender information is absent here, defaulting to male

Просмотреть файл

@ -100,7 +100,10 @@ while($line = <KEY>) {
($seg_id, $lang, $dialect, $conv_id, $channel,
$cut, $dur, $corp, $gender, $loc, $alt_lang) = split(" ", $line);
$wav = `find $db_dir -name "$seg_id*"`;
chomp($wav);
if ($wav eq "") {
next;
}
$lang = lc $lang;
$dialect = lc $dialect;
if ($dialect eq "na") {