sandbox/lid: data-prep script and run.sh fixes.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3756 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2014-03-10 01:53:55 +00:00
Родитель c52a713602
Коммит 18e1be0067
3 изменённых файлов: 11 добавлений и 5 удалений

Просмотреть файл

@ -50,7 +50,7 @@ while($line = <DB>) {
} }
$uttId = "lre03_${seg_id}"; $uttId = "lre03_${seg_id}";
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav"." |\n"; print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n";
print UTT2SPK "$uttId $uttId\n"; print UTT2SPK "$uttId $uttId\n";
print UTT2LANG "$uttId $lang\n"; print UTT2LANG "$uttId $lang\n";
print SPK2GEN "$uttId $gender\n"; print SPK2GEN "$uttId $gender\n";
@ -90,7 +90,7 @@ for $set ("lid96d1", "lid96e1") {
$wav = "$data_dir/$duration/$seg_id.sph"; $wav = "$data_dir/$duration/$seg_id.sph";
$uttId = "${set}_${seg_id}"; $uttId = "${set}_${seg_id}";
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n"; print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav |\n";
print UTT2SPK "$uttId $uttId\n"; print UTT2SPK "$uttId $uttId\n";
print UTT2LANG "$uttId $lang\n"; print UTT2LANG "$uttId $lang\n";
# Gender information is absent here, not outputting spk2gender file. # Gender information is absent here, not outputting spk2gender file.

Просмотреть файл

@ -33,7 +33,7 @@ while($line = <KEY>) {
chomp($line); chomp($line);
# If the line isn't a comment # If the line isn't a comment
if (index($line, "#") == -1) { if (index($line, "#") == -1) {
($fi, $lang, $conv_id, $chan, $test_cut) = split(" ", $line); ($fi, $lang, $conv_id, $channel, $test_cut) = split(" ", $line);
# Verify that we have only Indian English. # Verify that we have only Indian English.
if (not ($lang eq "IE")) { if (not ($lang eq "IE")) {
die "$db_ie contains non-Indian English utterances."; die "$db_ie contains non-Indian English utterances.";
@ -43,7 +43,12 @@ while($line = <KEY>) {
# This part of the corpus is only english.indian. # This part of the corpus is only english.indian.
$uttId = "lid05d1_$utt"; $uttId = "lid05d1_$utt";
$wav = $db_ie . $fi; $wav = $db_ie . $fi;
print WAV "$uttId"," sph2pipe -f wav -p -c $chan $wav |\n"; if (! -f $wav) {
print STDERR "No such file $wav (skipping)\n";
next;
}
$channel =~ tr/AB/12/;
print WAV "$uttId"," sph2pipe -f wav -p -c $channel $wav |\n";
print UTT2SPK "$uttId $uttId\n"; print UTT2SPK "$uttId $uttId\n";
print UTT2LANG "$uttId english.indian\n"; print UTT2LANG "$uttId english.indian\n";
} }
@ -101,6 +106,7 @@ while($line = <KEY>) {
} }
$uttId = "lid05e1_".$seg_id; $uttId = "lid05e1_".$seg_id;
$channel =~ tr/AB/12/;
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n"; print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n";
print UTT2SPK "$uttId $uttId\n"; print UTT2SPK "$uttId $uttId\n";

Просмотреть файл

@ -51,7 +51,7 @@ rm foo
## HERE ## HERE
## ##
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
data/train exp/make_mfcc $mfccdir data/train exp/make_mfcc $mfccdir
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
data/lre07 exp/make_mfcc $mfccdir data/lre07 exp/make_mfcc $mfccdir