зеркало из https://github.com/mozilla/kaldi.git
sandbox/lid: data-prep script and run.sh fixes.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3756 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
c52a713602
Коммит
18e1be0067
|
@ -50,7 +50,7 @@ while($line = <DB>) {
|
||||||
}
|
}
|
||||||
$uttId = "lre03_${seg_id}";
|
$uttId = "lre03_${seg_id}";
|
||||||
|
|
||||||
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav"." |\n";
|
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n";
|
||||||
print UTT2SPK "$uttId $uttId\n";
|
print UTT2SPK "$uttId $uttId\n";
|
||||||
print UTT2LANG "$uttId $lang\n";
|
print UTT2LANG "$uttId $lang\n";
|
||||||
print SPK2GEN "$uttId $gender\n";
|
print SPK2GEN "$uttId $gender\n";
|
||||||
|
@ -90,7 +90,7 @@ for $set ("lid96d1", "lid96e1") {
|
||||||
|
|
||||||
$wav = "$data_dir/$duration/$seg_id.sph";
|
$wav = "$data_dir/$duration/$seg_id.sph";
|
||||||
$uttId = "${set}_${seg_id}";
|
$uttId = "${set}_${seg_id}";
|
||||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n";
|
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav |\n";
|
||||||
print UTT2SPK "$uttId $uttId\n";
|
print UTT2SPK "$uttId $uttId\n";
|
||||||
print UTT2LANG "$uttId $lang\n";
|
print UTT2LANG "$uttId $lang\n";
|
||||||
# Gender information is absent here, not outputting spk2gender file.
|
# Gender information is absent here, not outputting spk2gender file.
|
||||||
|
|
|
@ -33,7 +33,7 @@ while($line = <KEY>) {
|
||||||
chomp($line);
|
chomp($line);
|
||||||
# If the line isn't a comment
|
# If the line isn't a comment
|
||||||
if (index($line, "#") == -1) {
|
if (index($line, "#") == -1) {
|
||||||
($fi, $lang, $conv_id, $chan, $test_cut) = split(" ", $line);
|
($fi, $lang, $conv_id, $channel, $test_cut) = split(" ", $line);
|
||||||
# Verify that we have only Indian English.
|
# Verify that we have only Indian English.
|
||||||
if (not ($lang eq "IE")) {
|
if (not ($lang eq "IE")) {
|
||||||
die "$db_ie contains non-Indian English utterances.";
|
die "$db_ie contains non-Indian English utterances.";
|
||||||
|
@ -43,7 +43,12 @@ while($line = <KEY>) {
|
||||||
# This part of the corpus is only english.indian.
|
# This part of the corpus is only english.indian.
|
||||||
$uttId = "lid05d1_$utt";
|
$uttId = "lid05d1_$utt";
|
||||||
$wav = $db_ie . $fi;
|
$wav = $db_ie . $fi;
|
||||||
print WAV "$uttId"," sph2pipe -f wav -p -c $chan $wav |\n";
|
if (! -f $wav) {
|
||||||
|
print STDERR "No such file $wav (skipping)\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
$channel =~ tr/AB/12/;
|
||||||
|
print WAV "$uttId"," sph2pipe -f wav -p -c $channel $wav |\n";
|
||||||
print UTT2SPK "$uttId $uttId\n";
|
print UTT2SPK "$uttId $uttId\n";
|
||||||
print UTT2LANG "$uttId english.indian\n";
|
print UTT2LANG "$uttId english.indian\n";
|
||||||
}
|
}
|
||||||
|
@ -101,6 +106,7 @@ while($line = <KEY>) {
|
||||||
}
|
}
|
||||||
|
|
||||||
$uttId = "lid05e1_".$seg_id;
|
$uttId = "lid05e1_".$seg_id;
|
||||||
|
$channel =~ tr/AB/12/;
|
||||||
|
|
||||||
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n";
|
print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n";
|
||||||
print UTT2SPK "$uttId $uttId\n";
|
print UTT2SPK "$uttId $uttId\n";
|
||||||
|
|
|
@ -51,7 +51,7 @@ rm foo
|
||||||
## HERE
|
## HERE
|
||||||
##
|
##
|
||||||
|
|
||||||
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
|
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
|
||||||
data/train exp/make_mfcc $mfccdir
|
data/train exp/make_mfcc $mfccdir
|
||||||
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
|
steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
|
||||||
data/lre07 exp/make_mfcc $mfccdir
|
data/lre07 exp/make_mfcc $mfccdir
|
||||||
|
|
Загрузка…
Ссылка в новой задаче