зеркало из https://github.com/mozilla/kaldi.git
sandbox/language_id: code and script updates for evaluating the model.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@3639 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
45bb9d0168
Коммит
c437252251
|
@ -29,9 +29,25 @@ posterior_output=posteriors
|
|||
scores=posteriors
|
||||
|
||||
classes=exp/ivectors_train/trials
|
||||
utils/utt2lang_to_utt2langint.py exp/ivectors_train/languages.txt \
|
||||
data/train/utt2lang $trials
|
||||
|
||||
logistic-regression-eval $model scp:$train_ivectors $posterior_output 2>$log
|
||||
logistic-regression-eval $model ark:$trials scp:$train_ivectors $scores 2>$log
|
||||
trials="utils/sym2int.pl -f 2 exp/ivectors_train/languages.txt data/train/utt2lang|"
|
||||
scores="|utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_train/train_scores"
|
||||
|
||||
logistic-regression-eval $model scp:$train_ivectors ark,t:exp/ivectors_train/posteriors
|
||||
logistic-regression-eval $model "ark:$trials" scp:$train_ivectors "$scores"
|
||||
|
||||
logistic-regression-eval $model scp:$train_ivectors ark,t:- | \
|
||||
awk '{max=$3; argmax=3; for(f=3;f<NF;f++) { if ($f>max) { max=$f; argmax=f; }} print $1, (argmax - 3); }' | \
|
||||
utils/int2sym.pl -f 2 exp/ivectors_train/languages.txt >exp/ivectors_train/train_output
|
||||
|
||||
# note: we treat the language as a sentence; it happens that the WER/SER
|
||||
# corresponds to the recognition error rate.
|
||||
compute-wer --text ark:data/train/utt2lang ark:exp/ivectors_train/train_output
|
||||
|
||||
# It perfectly classifies the training data:
|
||||
#%WER 0.00 [ 0 / 10173, 0 ins, 0 del, 0 sub ]
|
||||
#%SER 0.00 [ 0 / 10173 ]
|
||||
#Scored 10173 sentences, 0 not present in hyp.
|
||||
|
||||
|
||||
# someone needs to extend this to run on the dev data.
|
||||
|
|
|
@ -24,16 +24,17 @@
|
|||
|
||||
using namespace kaldi;
|
||||
|
||||
void posteriors(ParseOptions &po, const LogisticRegressionConfig &config) {
|
||||
int ComputePosteriors(ParseOptions &po, const LogisticRegressionConfig &config) {
|
||||
std::string model = po.GetArg(1),
|
||||
vector_rspecifier = po.GetArg(2),
|
||||
posteriors_out = po.GetArg(3);
|
||||
|
||||
LogisticRegression classifier = LogisticRegression();
|
||||
vector_rspecifier = po.GetArg(2),
|
||||
posteriors_wspecifier = po.GetArg(3);
|
||||
|
||||
LogisticRegression classifier;
|
||||
ReadKaldiObject(model, &classifier);
|
||||
|
||||
std::vector<Vector<BaseFloat> > vectors;
|
||||
SequentialBaseFloatVectorReader vector_reader(vector_rspecifier);
|
||||
BaseFloatVectorWriter posterior_writer(posteriors_wspecifier);
|
||||
std::vector<std::string> utt_list;
|
||||
int32 num_utt_done = 0;
|
||||
|
||||
|
@ -45,6 +46,11 @@ void posteriors(ParseOptions &po, const LogisticRegressionConfig &config) {
|
|||
num_utt_done++;
|
||||
}
|
||||
|
||||
if (vectors.empty()) {
|
||||
KALDI_WARN << "Read no input";
|
||||
return 1;
|
||||
}
|
||||
|
||||
Matrix<double> xs(vectors.size(), vectors[0].Dim());
|
||||
for (int i = 0; i < vectors.size(); i++) {
|
||||
xs.Row(i).CopyFromVec(vectors[i]);
|
||||
|
@ -52,19 +58,20 @@ void posteriors(ParseOptions &po, const LogisticRegressionConfig &config) {
|
|||
|
||||
Matrix<double> posteriors;
|
||||
classifier.GetPosteriors(xs, &posteriors);
|
||||
std::ofstream posteriors_xwstream(posteriors_out.c_str(), std::ios::out);
|
||||
|
||||
KALDI_LOG << "Calculated posteriors for " << num_utt_done << " vectors.";
|
||||
for (int i = 0; i < posteriors.NumRows(); i++) {
|
||||
posteriors_xwstream << utt_list[i] << " " << posteriors.Row(i);
|
||||
}
|
||||
posteriors_xwstream.close();
|
||||
Vector<BaseFloat> row(posteriors.Row(i));
|
||||
posterior_writer.Write(utt_list[i], row);
|
||||
}
|
||||
return (num_utt_done == 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
void scores(ParseOptions &po, const LogisticRegressionConfig &config) {
|
||||
int32 ComputeScores(ParseOptions &po, const LogisticRegressionConfig &config) {
|
||||
std::string model_rspecifier = po.GetArg(1),
|
||||
trials_rspecifier = po.GetArg(2),
|
||||
vector_rspecifier = po.GetArg(3),
|
||||
scores_out = po.GetArg(4);
|
||||
trials_rspecifier = po.GetArg(2),
|
||||
vector_rspecifier = po.GetArg(3),
|
||||
scores_out = po.GetArg(4);
|
||||
|
||||
SequentialInt32Reader class_reader(trials_rspecifier);
|
||||
LogisticRegression classifier = LogisticRegression();
|
||||
|
@ -91,6 +98,11 @@ void scores(ParseOptions &po, const LogisticRegressionConfig &config) {
|
|||
}
|
||||
}
|
||||
|
||||
if (vectors.empty()) {
|
||||
KALDI_WARN << "Read no input";
|
||||
return 1;
|
||||
}
|
||||
|
||||
Matrix<double> xs(vectors.size(), vectors[0].Dim());
|
||||
for (int i = 0; i < vectors.size(); i++) {
|
||||
xs.Row(i).CopyFromVec(vectors[i]);
|
||||
|
@ -98,15 +110,17 @@ void scores(ParseOptions &po, const LogisticRegressionConfig &config) {
|
|||
|
||||
Matrix<double> posteriors;
|
||||
classifier.GetPosteriors(xs, &posteriors);
|
||||
|
||||
bool binary = false;
|
||||
Output ko(scores_out.c_str(), binary);
|
||||
|
||||
std::ofstream scores_xwstream(scores_out.c_str(), std::ios::out);
|
||||
for (int i = 0; i < ys.size(); i++) {
|
||||
scores_xwstream << utt_list[i] << " " << ys[i] << " " << posteriors(i, ys[i]) << std::endl;
|
||||
ko.Stream() << utt_list[i] << " " << ys[i] << " " << posteriors(i, ys[i]) << std::endl;
|
||||
}
|
||||
scores_xwstream.close();
|
||||
KALDI_LOG << "Calculated scores for" << num_utt_done
|
||||
KALDI_LOG << "Calculated scores for " << num_utt_done
|
||||
<< " vectors with "
|
||||
<< num_utt_err << " missing. ";
|
||||
return (num_utt_done == 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
@ -116,10 +130,10 @@ int main(int argc, char *argv[]) {
|
|||
const char *usage =
|
||||
"Evaluates a model on input vectors and outputs either\n"
|
||||
"posterior probability or scores.\n"
|
||||
"Usage1: logistic-regression-eval <model> <input-vectors>\n"
|
||||
" <output-posteriors>\n"
|
||||
"Usage2: logistic-regression-eval <model> <trials> <input-vectors>\n"
|
||||
" <output-scores>\n";
|
||||
"Usage1: logistic-regression-eval <model> <input-vectors-rspecifier>\n"
|
||||
" <output-posteriors-wspecifier>\n"
|
||||
"Usage2: logistic-regression-eval <model> <trials-file> <input-vectors-rspecifier>\n"
|
||||
" <output-scores-file>\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
|
@ -134,9 +148,9 @@ int main(int argc, char *argv[]) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
(po.NumArgs() == 4) ? scores(po, config) : posteriors(po, config);
|
||||
|
||||
return 0;
|
||||
return (po.NumArgs() == 4) ?
|
||||
ComputeScores(po, config) :
|
||||
ComputePosteriors(po, config);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
|
|
Загрузка…
Ссылка в новой задаче