From 18e1be0067bca38aacd31b1fb0e8addbde5bf629 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Mon, 10 Mar 2014 01:53:55 +0000 Subject: [PATCH] sandbox/lid: data-prep script and run.sh fixes. git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3756 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8 --- egs/lre/v1/local/make_lre03.pl | 4 ++-- egs/lre/v1/local/make_lre05.pl | 10 ++++++++-- egs/lre/v1/run.sh | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/egs/lre/v1/local/make_lre03.pl b/egs/lre/v1/local/make_lre03.pl index 50085d76b..b7b29f656 100755 --- a/egs/lre/v1/local/make_lre03.pl +++ b/egs/lre/v1/local/make_lre03.pl @@ -50,7 +50,7 @@ while($line = ) { } $uttId = "lre03_${seg_id}"; - print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav"." |\n"; + print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n"; print UTT2SPK "$uttId $uttId\n"; print UTT2LANG "$uttId $lang\n"; print SPK2GEN "$uttId $gender\n"; @@ -90,7 +90,7 @@ for $set ("lid96d1", "lid96e1") { $wav = "$data_dir/$duration/$seg_id.sph"; $uttId = "${set}_${seg_id}"; - print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav"." |\n"; + print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav |\n"; print UTT2SPK "$uttId $uttId\n"; print UTT2LANG "$uttId $lang\n"; # Gender information is absent here, not outputting spk2gender file. diff --git a/egs/lre/v1/local/make_lre05.pl b/egs/lre/v1/local/make_lre05.pl index 650c63fbe..f77984c4d 100755 --- a/egs/lre/v1/local/make_lre05.pl +++ b/egs/lre/v1/local/make_lre05.pl @@ -33,7 +33,7 @@ while($line = ) { chomp($line); # If the line isn't a comment if (index($line, "#") == -1) { - ($fi, $lang, $conv_id, $chan, $test_cut) = split(" ", $line); + ($fi, $lang, $conv_id, $channel, $test_cut) = split(" ", $line); # Verify that we have only Indian English. if (not ($lang eq "IE")) { die "$db_ie contains non-Indian English utterances."; @@ -43,7 +43,12 @@ while($line = ) { # This part of the corpus is only english.indian. $uttId = "lid05d1_$utt"; $wav = $db_ie . $fi; - print WAV "$uttId"," sph2pipe -f wav -p -c $chan $wav |\n"; + if (! -f $wav) { + print STDERR "No such file $wav (skipping)\n"; + next; + } + $channel =~ tr/AB/12/; + print WAV "$uttId"," sph2pipe -f wav -p -c $channel $wav |\n"; print UTT2SPK "$uttId $uttId\n"; print UTT2LANG "$uttId english.indian\n"; } @@ -101,6 +106,7 @@ while($line = ) { } $uttId = "lid05e1_".$seg_id; + $channel =~ tr/AB/12/; print WAV "$uttId"," sph2pipe -f wav -p -c ${channel} $wav |\n"; print UTT2SPK "$uttId $uttId\n"; diff --git a/egs/lre/v1/run.sh b/egs/lre/v1/run.sh index 571242d54..df450f44b 100755 --- a/egs/lre/v1/run.sh +++ b/egs/lre/v1/run.sh @@ -51,7 +51,7 @@ rm foo ## HERE ## -steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ +steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \ data/train exp/make_mfcc $mfccdir steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ data/lre07 exp/make_mfcc $mfccdir