зеркало из https://github.com/mozilla/kaldi.git
sandbox/language_id: Adding scripts to produce the LRE07 General Closed-Set Language Recognition eval. Also fixing a minor bug in run_logistic_regression.sh when rebalancing priors.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@4005 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
e5c630fd75
Коммит
55c24a43aa
|
@ -0,0 +1,92 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2014 David Snyder
|
||||
# Apache 2.0.
|
||||
#
|
||||
# Calculates the 3s, 10s, and 30s error rates and C_avgs
|
||||
# on the LRE07 General Language Recognition closed-set
|
||||
# using the logistic regression model passed in as an argument.
|
||||
# Detailed results such as the probability of misses for individual
|
||||
# languages are computed in local/lre07_results.
|
||||
|
||||
. cmd.sh
|
||||
. path.sh
|
||||
set -e
|
||||
|
||||
model=$1
|
||||
|
||||
mkdir -p local/lre07_results
|
||||
lre07dir=local/lre07_results
|
||||
|
||||
# Compute the posterior probabilities for all durations (3s, 10s, and 30s),
|
||||
# as well as the target and nontarget files.
|
||||
test_ivectors="ark:ivector-normalize-length \
|
||||
scp:exp/ivectors_lre07/ivector.scp ark:- |";
|
||||
logistic-regression-eval $model "$test_ivectors" \
|
||||
ark,t:exp/ivectors_lre07/posteriors
|
||||
|
||||
local/lre07_targets.pl exp/ivectors_lre07/posteriors data/lre07/utt2lang \
|
||||
exp/ivectors_train/languages.txt $lre07dir/targets \
|
||||
$lre07dir/nontargets>/dev/null
|
||||
|
||||
# Create the the score (eg, targets.scr) file.
|
||||
local/score_lre07.v01d.pl -t $lre07dir/targets -n $lre07dir/nontargets
|
||||
|
||||
# Compute the posterior probabilities for each duration, as well as
|
||||
# the target and nontarget files.
|
||||
for dur in "3" "10" "30"; do
|
||||
utils/filter_scp.pl -f 0 data/lre07/"$dur"sec \
|
||||
exp/ivectors_lre07/ivector.scp > \
|
||||
exp/ivectors_lre07/ivector_"$dur"sec.scp
|
||||
test_ivectors="ark:ivector-normalize-length \
|
||||
scp:exp/ivectors_lre07/ivector_"$dur"sec.scp ark:- |";
|
||||
|
||||
logistic-regression-eval $model "$test_ivectors" \
|
||||
ark,t:exp/ivectors_lre07/posteriors_"$dur"sec
|
||||
|
||||
local/lre07_targets.pl exp/ivectors_lre07/posteriors_"$dur"sec \
|
||||
<(utils/filter_scp.pl -f 0 data/lre07/"$dur"sec data/lre07/utt2lang) \
|
||||
exp/ivectors_train/languages.txt \
|
||||
"$lre07dir"/targets_"$dur"sec "$lre07dir"/nontargets_"$dur"sec>/dev/null
|
||||
local/score_lre07.v01d.pl -t "$lre07dir"/targets_"$dur"sec -n \
|
||||
"$lre07dir"/nontargets_"$dur"sec>/dev/null
|
||||
done
|
||||
|
||||
printf '% 15s' 'Duration (sec):'
|
||||
for dur in "avg" "3" "10" "30"; do
|
||||
printf '% 7s' $dur;
|
||||
done
|
||||
echo
|
||||
|
||||
printf '% 15s' 'ER (%):'
|
||||
|
||||
# Get the overall classification and then individual duration error rates.
|
||||
er=$(compute-wer --text ark:<(lid/remove_dialect.pl data/lre07/utt2lang) \
|
||||
ark:exp/ivectors_lre07/output 2>/dev/null | grep "WER" | awk '{print $2 }')
|
||||
printf '% 7.2f' $er
|
||||
|
||||
for dur in "3" "10" "30"; do
|
||||
er=$(compute-wer --text ark:<(utils/filter_scp.pl -f 0 \
|
||||
data/lre07/"$dur"sec data/lre07/utt2lang | lid/remove_dialect.pl -) \
|
||||
ark:<(utils/filter_scp.pl -f 0 data/lre07/"$dur"sec \
|
||||
exp/ivectors_lre07/output) \
|
||||
2>/dev/null | grep "WER" | awk '{print $2 }')
|
||||
printf '% 7.2f' $er
|
||||
done
|
||||
echo
|
||||
|
||||
printf '% 15s' 'C_avg (%):'
|
||||
|
||||
# Get the overall C_avg and then C_avgs for the individual durations.
|
||||
cavg=$(tail -n 1 $lre07dir/targets.scr \
|
||||
| awk '{print 100*$4 }')
|
||||
printf '% 7.2f' $cavg
|
||||
|
||||
for dur in "3" "10" "30"; do
|
||||
cavg=$(tail -n 1 $lre07dir/targets_${dur}sec.scr \
|
||||
| awk '{print 100.0*$4 }')
|
||||
printf '% 7.2f' $cavg
|
||||
done
|
||||
echo
|
||||
# Duration (sec): avg 3 10 30
|
||||
# ER (%): 33.04 53.21 29.55 16.37
|
||||
# C_avg (%): 17.65 29.53 15.64 7.79
|
|
@ -0,0 +1,109 @@
|
|||
#! /usr/bin/perl
|
||||
#
|
||||
# Copyright 2014 David Snyder
|
||||
# Apache 2.0.
|
||||
#
|
||||
# Creates the target and nontarget files used by score_lre07.v01d.pl for
|
||||
# NIST LRE 2007 General Language Recognition closed-set evaluation.
|
||||
# See http://www.itl.nist.gov/iad/mig//tests/lre/2007/LRE07EvalPlan-v8b.pdf
|
||||
# for more details on the evaluation.
|
||||
|
||||
if (@ARGV != 5) {
|
||||
print STDERR "Usage: $0 <path-to-posteriors> <path-to-utt2lang> \
|
||||
<path-to-languages.txt> <path-to-targets-output> \
|
||||
<path-to-nontargets-output>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
($posts, $utt2lang, $languages, $targets, $nontargets) = @ARGV;
|
||||
%lang_to_idx = ();
|
||||
%idx_to_lang = ();
|
||||
%utt_to_lang = ();
|
||||
$oos_lang = "zzz";
|
||||
open(LANG2IDX, "<", $languages) || die "Cannot open $languages file";
|
||||
while (<LANG2IDX>) {
|
||||
chomp;
|
||||
@toks = split(" ", $_);
|
||||
$lang = $toks[0];
|
||||
$idx = $toks[1];
|
||||
$lang_to_idx{$lang} = $idx;
|
||||
$idx_to_lang{$idx} = $lang;
|
||||
}
|
||||
close(LANG2IDX) || die;
|
||||
|
||||
open(UTT2LANG, "<", $utt2lang) || die "Cannot open $utt2lang file";
|
||||
while (<UTT2LANG>) {
|
||||
chomp;
|
||||
@toks = split(" ", $_);
|
||||
$utt = $toks[0];
|
||||
$lang = $toks[1];
|
||||
$utt_to_lang{$utt} = $lang;
|
||||
}
|
||||
close(UTT2LANG) || die;
|
||||
|
||||
open(POSTS, "<", $posts) || die "Cannot open $posts file";
|
||||
open(TARGETS, ">", $targets) || die "Cannot open $targets file";
|
||||
open(NONTARGETS, ">", $nontargets) || die "Cannot open $nontargets file";
|
||||
while($line = <POSTS>) {
|
||||
chomp($line);
|
||||
$line =~ s/[\[\]]//g;
|
||||
@toks = split(" ", $line);
|
||||
$utt = $toks[0];
|
||||
$actual_lang = $utt_to_lang{$utt};
|
||||
$size = $#toks + 1;
|
||||
$max_lang = "zzz";
|
||||
$max_log_prob = -9**9**9; #-inf
|
||||
$target_prob = 0;
|
||||
# Handle target
|
||||
for ($i = 1; $i < $size; $i++) {
|
||||
if ($max_log_prob < $toks[$i]) {
|
||||
$max_log_prob = $toks[$i];
|
||||
$max_lang = $idx_to_lang{$i-1};
|
||||
}
|
||||
if ($actual_lang eq $idx_to_lang{$i-1}) {
|
||||
print "$actual_lang $idx_to_lang{$i-1}\n";
|
||||
}
|
||||
if (index($actual_lang, $idx_to_lang{$i-1}) != -1
|
||||
|| $actual_lang eq $idx_to_lang{$i-1}) {
|
||||
$target_prob = exp($toks[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (index($actual_lang, ".") != -1) {
|
||||
@lang_parts = split("[.]", $actual_lang);
|
||||
$lang = $lang_parts[0];
|
||||
} else {
|
||||
$lang = $actual_lang;
|
||||
}
|
||||
if ($lang =~ /(arabic|bengali|farsi|german|japanese|korean|russian|tamil|thai|vietnamese|chinese|english|hindustani|spanish)/i) {
|
||||
if (index($actual_lang, $max_lang) != -1 || $actual_lang eq $max_lang) {
|
||||
print TARGETS "general_lr $lang closed_set $utt t $target_prob "
|
||||
."$actual_lang\n";
|
||||
} else {
|
||||
print TARGETS "general_lr $lang closed_set $utt f $target_prob "
|
||||
."$actual_lang\n";
|
||||
}
|
||||
}
|
||||
# Handle nontarget
|
||||
for ($i = 1; $i < $size; $i++) {
|
||||
$nontarget_lang = $idx_to_lang{$i-1};
|
||||
next if (index($actual_lang, $nontarget_lang) != -1
|
||||
|| $actual_lang eq $nontarget_lang);
|
||||
|
||||
# if the nontarget lang is most probable
|
||||
if ($nontarget_lang =~ /(arabic|bengali|farsi|german|japanese|korean|russian|tamil|thai|vietnamese|chinese|english|hindustani|spanish)/i) {
|
||||
$prob = exp($toks[$i]);
|
||||
if (index($max_lang, $nontarget_lang) != -1
|
||||
|| $max_lang eq $nontarget_lang) {
|
||||
print NONTARGETS "general_lr $nontarget_lang closed_set $utt t "
|
||||
."$prob $actual_lang\n";
|
||||
} else {
|
||||
print NONTARGETS "general_lr $nontarget_lang closed_set $utt f "
|
||||
."$prob $actual_lang\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
close(POSTS) || die;
|
||||
close(TARGETS) || die;
|
||||
close(NONTARGETS) || die;
|
|
@ -36,14 +36,19 @@ open(WAV, ">$dir/wav.scp") || die "Failed opening output file $out_dir/wav.scp";
|
|||
open(UTT2SPK, ">$dir/utt2spk") || die "Failed opening output file $dir/utt2spk";
|
||||
open(SPK2UTT, ">$dir/spk2utt") || die "Failed opening output file $dir/spk2utt";
|
||||
open(UTT2LANG, ">$dir/utt2lang") || die "Failed opening output file $dir/utt2lang";
|
||||
open(DUR3, ">$dir/3sec") || die "Failed opening output file $dir/3sec";
|
||||
open(DUR10, ">$dir/10sec") || die "Failed opening output file $dir/10sec";
|
||||
open(DUR30, ">$dir/30sec") || die "Failed opening output file $dir/30sec";
|
||||
|
||||
my $key_str = `wget -qO- "http://www.itl.nist.gov/iad/mig/tests/lang/2007/lid07key_v5.txt"`;
|
||||
@key_lines = split("\n",$key_str);
|
||||
%utt2lang = ();
|
||||
%utt2dur = ();
|
||||
foreach (@key_lines) {
|
||||
@words = split(' ', $_);
|
||||
if (index($words[0], "#") == -1) {
|
||||
$utt2lang{$words[0]} = $words[1];
|
||||
$utt2dur{$words[0]} = $words[5];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -55,11 +60,23 @@ foreach (sort keys(%wav)) {
|
|||
print UTT2SPK "$uttId $uttId\n";
|
||||
print SPK2UTT "$uttId $uttId\n";
|
||||
print UTT2LANG "$uttId $utt2lang{$uttId}\n";
|
||||
if ($utt2dur{$uttId} == 3) {
|
||||
print DUR3 "$uttId\n";
|
||||
} elsif ($utt2dur{$uttId} == 10) {
|
||||
print DUR10 "$uttId\n";
|
||||
} elsif ($utt2dur{$uttId} == 30) {
|
||||
print DUR30 "$uttId\n";
|
||||
} else {
|
||||
die "Invalid nominal duration in test segment";
|
||||
}
|
||||
}
|
||||
close(WAV) || die;
|
||||
close(UTT2SPK) || die;
|
||||
close(SPK2UTT) || die;
|
||||
close(UTT2LANG) || die;
|
||||
close(DUR3) || die;
|
||||
close(DUR10) || die;
|
||||
close(DUR30) || die;
|
||||
close(WAVLIST) || die;
|
||||
system("rm -r $dir/tmp");
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -31,7 +31,8 @@ classes="ark:lid/remove_dialect.pl data/train/utt2lang \
|
|||
# Create priors to rebalance the model. The following script rebalances
|
||||
# the languages as count(lang_test) / (count(lang_test) + count(lang_train)).
|
||||
lid/balance_priors_to_test.pl \
|
||||
<(lid/remove_dialect.pl data/train/utt2lang) \
|
||||
<(lid/remove_dialect.pl <(utils/filter_scp.pl -f 0 \
|
||||
exp/ivectors_train/ivector.scp data/train/utt2lang)) \
|
||||
<(lid/remove_dialect.pl data/lre07/utt2lang) \
|
||||
exp/ivectors_train/languages.txt \
|
||||
exp/ivectors_train/priors.vec
|
||||
|
@ -66,8 +67,8 @@ cat exp/ivectors_train/posteriors | \
|
|||
compute-wer --mode=present --text ark:<(lid/remove_dialect.pl data/train/utt2lang) \
|
||||
ark:exp/ivectors_train/output
|
||||
|
||||
# %WER 4.19 [ 3000 / 71668, 0 ins, 0 del, 3000 sub ] [PARTIAL]
|
||||
# %SER 4.19 [ 3000 / 71668 ]
|
||||
# %WER 4.73 [ 3389 / 71668, 0 ins, 0 del, 3389 sub ] [PARTIAL]
|
||||
# %SER 4.73 [ 3389 / 71668 ]
|
||||
# Scored 71668 sentences, 16 not present in hyp.
|
||||
logistic-regression-eval $model_rebalanced \
|
||||
'ark:ivector-normalize-length scp:exp/ivectors_lre07/ivector.scp ark:- |' ark,t:- | \
|
||||
|
@ -78,7 +79,13 @@ logistic-regression-eval $model_rebalanced \
|
|||
|
||||
compute-wer --text ark:<(lid/remove_dialect.pl data/lre07/utt2lang) \
|
||||
ark:exp/ivectors_lre07/output
|
||||
# %WER 32.58 [ 2452 / 7527, 0 ins, 0 del, 2452 sub ]
|
||||
# %SER 32.58 [ 2452 / 7527 ]
|
||||
|
||||
# %WER 33.04 [ 2487 / 7527, 0 ins, 0 del, 2487 sub ]
|
||||
# %SER 33.04 [ 2487 / 7527 ]
|
||||
# Scored 7527 sentences, 0 not present in hyp.
|
||||
|
||||
# General LR closed-set eval.
|
||||
local/lre07_logistic_regression_eval.sh $model_rebalanced
|
||||
#Duration (sec): avg 3 10 30
|
||||
# ER (%): 33.04 53.21 29.55 16.37
|
||||
# C_avg (%): 17.65 29.53 15.64 7.79
|
||||
|
|
Загрузка…
Ссылка в новой задаче