зеркало из https://github.com/mozilla/kaldi.git
sandbox/lid: various script fixes and updates; improving speed of iVector-extractor model loading by parallelizing derived-variables computation.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3759 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
a1a368dc83
Коммит
5c5a4e2f5a
|
@ -75,6 +75,7 @@ if [ $stage -le 1 ]; then
|
|||
if ($len <= $max_utt_len) {
|
||||
print SEGMENTS "${utt}-1 ${utt} 0 -1\n";
|
||||
print UTT2SPK "${utt}-1 $speaker\n";
|
||||
print UTT2LANG "${utt}-1 $language\n";
|
||||
} else {
|
||||
# We will now allow split length to exceed max_utt_len.
|
||||
$num_split = int(($len + 0.999*$max_utt_len) / $max_utt_len);
|
||||
|
|
|
@ -63,12 +63,6 @@ lid/compute_vad_decision.sh --nj 4 --cmd "$train_cmd" data/train \
|
|||
lid/compute_vad_decision.sh --nj 4 --cmd "$train_cmd" data/lre07 \
|
||||
exp/make_vad $vaddir
|
||||
|
||||
# Use 4k of the 14k utterances for testing, but make sure the speakers do not
|
||||
# overlap with the rest of the data, which will be used for training.
|
||||
#utils/subset_data_dir.sh --speakers data/all 4000 data/lre07
|
||||
#utils/filter_scp.pl --exclude data/lre07/spk2utt < data/all/spk2utt | awk '{print $1}' > foo
|
||||
#utils/subset_data_dir.sh --spk-list foo data/all data/train
|
||||
|
||||
|
||||
utils/subset_data_dir.sh data/train 5000 data/train_5k
|
||||
utils/subset_data_dir.sh data/train 10000 data/train_10k
|
||||
|
@ -82,7 +76,6 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train_10k \
|
|||
lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \
|
||||
exp/full_ubm_2048_10k exp/full_ubm_2048
|
||||
|
||||
|
||||
lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \
|
||||
--num-iters 5 exp/full_ubm_2048/final.ubm data/train \
|
||||
exp/extractor_2048
|
||||
|
@ -91,6 +84,4 @@ lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
|
|||
exp/extractor_2048 data/train exp/ivectors_train
|
||||
|
||||
lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
|
||||
exp/extractor_2048 data/lre07 exp/ivectors_test
|
||||
|
||||
|
||||
exp/extractor_2048 data/lre07 exp/ivectors_lre07
|
||||
|
|
|
@ -8,15 +8,13 @@
|
|||
. cmd.sh
|
||||
. path.sh
|
||||
set -e
|
||||
mfccdir=`pwd`/mfcc
|
||||
vaddir=`pwd`/mfcc
|
||||
|
||||
config=conf/logistic-regression.conf
|
||||
|
||||
awk '{print $2}' <(utils/remove_dialect.pl data/train/utt2lang) | sort -u | \
|
||||
awk '{print $1, NR-1}' > exp/ivectors_train/languages.txt
|
||||
|
||||
|
||||
log=exp/ivectors_train/log/logistic_regression.log
|
||||
|
||||
model=exp/ivectors_train/logistic_regression
|
||||
model_rebalanced=exp/ivectors_train/logistic_regression_rebalanced
|
||||
|
@ -40,12 +38,12 @@ utils/balance_priors_to_test.pl \
|
|||
exp/ivectors_train/priors.vec
|
||||
|
||||
logistic-regression-train --config=$config scp:$train_ivectors \
|
||||
"$classes" $model 2>$log
|
||||
"$classes" $model \
|
||||
2>exp/ivectors_train/log/logistic_regression.log
|
||||
|
||||
( logistic-regression-train --config=$config scp:$train_ivectors \
|
||||
"$classes" - | \
|
||||
logistic-regression-copy --scale-priors=exp/ivectors_train/priors.vec - \
|
||||
$model_rebalanced ) 2>$log
|
||||
|
||||
logistic-regression-copy --scale-priors=exp/ivectors_train/priors.vec \
|
||||
$model $model_rebalanced
|
||||
|
||||
trials="utils/remove_dialect.pl data/train/utt2lang \
|
||||
| utils/sym2int.pl -f 2 exp/ivectors_train/languages.txt -|"
|
||||
|
@ -57,7 +55,7 @@ logistic-regression-eval $model scp:$train_ivectors \
|
|||
|
||||
logistic-regression-eval $model "ark:$trials" scp:$train_ivectors "$scores"
|
||||
|
||||
logistic-regression-eval $model scp:$train_ivectors ark,t:- | \
|
||||
cat exp/ivectors_train/posteriors | \
|
||||
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max)
|
||||
{ max=$f; argmax=f; }}
|
||||
print $1, (argmax - 3); }' | \
|
||||
|
@ -66,29 +64,28 @@ logistic-regression-eval $model scp:$train_ivectors ark,t:- | \
|
|||
|
||||
# note: we treat the language as a sentence; it happens that the WER/SER
|
||||
# corresponds to the recognition error rate.
|
||||
compute-wer --text ark:<(utils/remove_dialect.pl data/train/utt2lang) \
|
||||
compute-wer --mode=present --text ark:<(utils/remove_dialect.pl data/train/utt2lang) \
|
||||
ark:exp/ivectors_train/output
|
||||
|
||||
# It perfectly classifies the training data:
|
||||
#%WER 0.00 [ 0 / 10173, 0 ins, 0 del, 0 sub ]
|
||||
#%SER 0.00 [ 0 / 10173 ]
|
||||
#Scored 10173 sentences, 0 not present in hyp.
|
||||
|
||||
#%WER 4.68 [ 3355 / 71668, 0 ins, 0 del, 3355 sub ] [PARTIAL]
|
||||
#%SER 4.68 [ 3355 / 71668 ]
|
||||
#Scored 71668 sentences, 16 not present in hyp.
|
||||
|
||||
|
||||
logistic-regression-eval $model_rebalanced \
|
||||
scp:exp/ivectors_test/ivector.scp ark,t:- | \
|
||||
scp:exp/ivectors_lre07/ivector.scp ark,t:- | \
|
||||
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max)
|
||||
{ max=$f; argmax=f; }}
|
||||
print $1, (argmax - 3); }' | \
|
||||
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_test/output
|
||||
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_lre07/output
|
||||
|
||||
|
||||
# someone needs to extend this to run on the dev data.
|
||||
|
||||
compute-wer --text ark:<(utils/remove_dialect.pl data/lre07/utt2lang)\
|
||||
ark:exp/ivectors_test/output
|
||||
# compute-wer --text ark:/dev/fd/63 ark:exp/lre07/output
|
||||
# %WER 58.83 [ 3958 / 7527, 0 ins, 0 del, 3958 sub ]
|
||||
# %SER 58.83 [ 3958 / 7527 ]
|
||||
compute-wer --text ark:<(utils/remove_dialect.pl data/lre07/utt2lang) \
|
||||
ark:exp/ivectors_lre07/output
|
||||
> compute-wer --text ark:/dev/fd/63 ark:exp/ivectors_lre07/output
|
||||
# %WER 34.34 [ 2585 / 7527, 0 ins, 0 del, 2585 sub ]
|
||||
# %SER 34.34 [ 2585 / 7527 ]
|
||||
# Scored 7527 sentences, 0 not present in hyp.
|
||||
|
||||
|
|
|
@ -77,13 +77,13 @@ else
|
|||
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
|
||||
split_scps=""
|
||||
for ((n=1; n<=nj; n++)); do
|
||||
split_scps="$split_scps $logdir/wav.$n.scp"
|
||||
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
|
||||
done
|
||||
|
||||
utils/split_scp.pl $scp $split_scps || exit 1;
|
||||
|
||||
$cmd JOB=1:$nj $logdir/make_mfcc_${name}.JOB.log \
|
||||
compute-mfcc-feats --verbose=2 --config=$mfcc_config scp:$logdir/wav.JOB.scp ark:- \| \
|
||||
compute-mfcc-feats --verbose=2 --config=$mfcc_config scp:$logdir/wav_${name}.JOB.scp ark:- \| \
|
||||
copy-feats --compress=$compress ark:- \
|
||||
ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
|
||||
|| exit 1;
|
||||
|
@ -102,7 +102,7 @@ for ((n=1; n<=nj; n++)); do
|
|||
cat $mfccdir/raw_mfcc_$name.$n.scp || exit 1;
|
||||
done > $data/feats.scp
|
||||
|
||||
rm $logdir/wav.*.scp $logdir/segments.* 2>/dev/null
|
||||
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
|
||||
|
||||
nf=`cat $data/feats.scp | wc -l`
|
||||
nu=`cat $data/utt2spk | wc -l`
|
||||
|
|
|
@ -228,12 +228,16 @@ if [ -f $data/spk2gender ]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
if [ -f $data/vad.scp ]; then
|
||||
check_sorted_and_uniq $data/vad.scp
|
||||
if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
|
||||
<( awk '{print $1}' $data/vad.scp ); then
|
||||
echo "$0: error: in $data, vad.scp and utt2spk do not have identical utterance-id list"
|
||||
# check some optionally-required things
|
||||
for f in vad.scp utt2lang; do
|
||||
if [ -f $data/$f ]; then
|
||||
check_sorted_and_uniq $data/$f
|
||||
if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
|
||||
<( awk '{print $1}' $data/$f ); then
|
||||
echo "$0: error: in $data, $f and utt2spk do not have identical utterance-id list"
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Successfully validated data-directory $data"
|
||||
|
|
|
@ -197,6 +197,17 @@ IvectorExtractor::IvectorExtractor(
|
|||
ComputeDerivedVars();
|
||||
}
|
||||
|
||||
class IvectorExtractorComputeDerivedVarsClass {
|
||||
public:
|
||||
IvectorExtractorComputeDerivedVarsClass(IvectorExtractor *extractor,
|
||||
int32 i):
|
||||
extractor_(extractor), i_(i) { }
|
||||
void operator () () { extractor_->ComputeDerivedVars(i_); }
|
||||
private:
|
||||
IvectorExtractor *extractor_;
|
||||
int32 i_;
|
||||
};
|
||||
|
||||
void IvectorExtractor::ComputeDerivedVars() {
|
||||
KALDI_LOG << "Computing derived variables for iVector extractor";
|
||||
gconsts_.Resize(NumGauss());
|
||||
|
@ -206,17 +217,32 @@ void IvectorExtractor::ComputeDerivedVars() {
|
|||
// the gconsts don't contain any weight-related terms.
|
||||
}
|
||||
U_.Resize(NumGauss(), IvectorDim() * (IvectorDim() + 1) / 2);
|
||||
SpMatrix<double> temp_U(IvectorDim());
|
||||
for (int32 i = 0; i < NumGauss(); i++) {
|
||||
// temp_U = M_i^T Sigma_i^{-1} M_i
|
||||
temp_U.AddMat2Sp(1.0, M_[i], kTrans, Sigma_inv_[i], 0.0);
|
||||
SubVector<double> temp_U_vec(temp_U.Data(),
|
||||
IvectorDim() * (IvectorDim() + 1) / 2);
|
||||
U_.Row(i).CopyFromVec(temp_U_vec);
|
||||
|
||||
// Note, we could have used RunMultiThreaded for this and similar tasks we
|
||||
// have here, but we found that we don't get as complete CPU utilization as we
|
||||
// could because some tasks finish before others.
|
||||
{
|
||||
TaskSequencerConfig sequencer_opts;
|
||||
sequencer_opts.num_threads = g_num_threads;
|
||||
TaskSequencer<IvectorExtractorComputeDerivedVarsClass> sequencer(
|
||||
sequencer_opts);
|
||||
for (int32 i = 0; i < NumGauss(); i++)
|
||||
sequencer.Run(new IvectorExtractorComputeDerivedVarsClass(this, i));
|
||||
}
|
||||
KALDI_LOG << "Done.";
|
||||
}
|
||||
|
||||
|
||||
void IvectorExtractor::ComputeDerivedVars(int32 i) {
|
||||
SpMatrix<double> temp_U(IvectorDim());
|
||||
// temp_U = M_i^T Sigma_i^{-1} M_i
|
||||
temp_U.AddMat2Sp(1.0, M_[i], kTrans, Sigma_inv_[i], 0.0);
|
||||
SubVector<double> temp_U_vec(temp_U.Data(),
|
||||
IvectorDim() * (IvectorDim() + 1) / 2);
|
||||
U_.Row(i).CopyFromVec(temp_U_vec);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void IvectorExtractor::GetIvectorDistWeight(
|
||||
const IvectorExtractorUtteranceStats &utt_stats,
|
||||
|
@ -986,7 +1012,7 @@ double IvectorStats::UpdateProjections(
|
|||
double tot_impr = 0.0;
|
||||
{
|
||||
TaskSequencerConfig sequencer_opts;
|
||||
sequencer_opts.num_threads = opts.num_threads;
|
||||
sequencer_opts.num_threads = g_num_threads;
|
||||
TaskSequencer<IvectorExtractorUpdateProjectionClass> sequencer(
|
||||
sequencer_opts);
|
||||
for (int32 i = 0; i < I; i++)
|
||||
|
@ -1149,7 +1175,7 @@ double IvectorStats::UpdateWeights(
|
|||
double tot_impr = 0.0;
|
||||
{
|
||||
TaskSequencerConfig sequencer_opts;
|
||||
sequencer_opts.num_threads = opts.num_threads;
|
||||
sequencer_opts.num_threads = g_num_threads;
|
||||
TaskSequencer<IvectorExtractorUpdateWeightClass> sequencer(
|
||||
sequencer_opts);
|
||||
for (int32 i = 0; i < I; i++)
|
||||
|
|
|
@ -57,6 +57,7 @@ struct IvectorEstimationOptions {
|
|||
|
||||
|
||||
class IvectorExtractor;
|
||||
class IvectorExtractorComputeDerivedVarsClass;
|
||||
|
||||
/// These are the stats for a particular utterance, i.e. the sufficient stats
|
||||
/// for estimating an iVector (if need_2nd_order_stats == true, we can also
|
||||
|
@ -229,7 +230,9 @@ class IvectorExtractor {
|
|||
// because they do what we want.
|
||||
protected:
|
||||
void ComputeDerivedVars();
|
||||
|
||||
void ComputeDerivedVars(int32 i);
|
||||
friend class IvectorExtractorComputeDerivedVarsClass;
|
||||
|
||||
// Imagine we'll project the iVectors with transformation T, so apply T^{-1}
|
||||
// where necessary to keep the model equivalent. Used to keep unit variance
|
||||
// (like prior re-estimation).
|
||||
|
@ -311,8 +314,7 @@ struct IvectorExtractorEstimationOptions {
|
|||
double gaussian_min_count;
|
||||
int32 num_threads;
|
||||
IvectorExtractorEstimationOptions(): variance_floor_factor(0.1),
|
||||
gaussian_min_count(100.0),
|
||||
num_threads(1) { }
|
||||
gaussian_min_count(100.0) { }
|
||||
void Register(OptionsItf *po) {
|
||||
po->Register("variance-floor-factor", &variance_floor_factor,
|
||||
"Factor that determines variance flooring (we floor each covar "
|
||||
|
@ -320,8 +322,6 @@ struct IvectorExtractorEstimationOptions {
|
|||
po->Register("gaussian-min-count", &gaussian_min_count,
|
||||
"Minimum total count per Gaussian, below which we refuse to "
|
||||
"update any associated parameters.");
|
||||
po->Register("num-threads", &num_threads,
|
||||
"Number of threads used in iVector estimation program");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -131,6 +131,9 @@ int main(int argc, char *argv[]) {
|
|||
posteriors_rspecifier = po.GetArg(3),
|
||||
ivectors_wspecifier = po.GetArg(4);
|
||||
|
||||
// g_num_threads affects how ComputeDerivedVars is called when we read the
|
||||
// extractor.
|
||||
g_num_threads = sequencer_config.num_threads;
|
||||
IvectorExtractor extractor;
|
||||
ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
|
||||
|
||||
|
|
|
@ -102,6 +102,12 @@ int main(int argc, char *argv[]) {
|
|||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
|
||||
|
||||
|
||||
// This is a bit of a mess... the code that reads in the extractor calls
|
||||
// ComputeDerivedVars, and it can do this multi-threaded, controlled by
|
||||
// g_num_threads. So if the user specified the --num-threads option, which
|
||||
// goes to sequencer_opts in this case, copy it to g_num_threads.
|
||||
g_num_threads = sequencer_opts.num_threads;
|
||||
|
||||
IvectorExtractor extractor;
|
||||
ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include "util/common-utils.h"
|
||||
#include "ivector/ivector-extractor.h"
|
||||
|
||||
#include "thread/kaldi-thread.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
|
@ -36,6 +36,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
kaldi::ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("num-threads", &g_num_threads,
|
||||
"Number of threads used in update");
|
||||
|
||||
update_opts.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -27,13 +27,15 @@
|
|||
// that you have some range of integers, e.g. A ... B-1 (with B > A), and some
|
||||
// function call that takes a range of integers, and you partition these up into
|
||||
// a number of blocks.
|
||||
// Also see kaldi-task-sequence.h which is suitable for parallelizing the processing
|
||||
// of tasks coming in sequentially from somewhere.
|
||||
|
||||
// TODO: if needed, provide a workaround for Windows and other
|
||||
// non-POSIX-compliant systems, possibly one that does not actually do
|
||||
// multi-threading.
|
||||
|
||||
|
||||
// Description of MultiThreadPool and it's usage:
|
||||
// Description of MultiThreadPool and its usage:
|
||||
//
|
||||
// Usage of the RunMultiThreadedPersistent is the same as the usage of
|
||||
// RunMultiThreaded, except that the object provided ust inherit MultiThreadable
|
||||
|
|
Загрузка…
Ссылка в новой задаче