зеркало из https://github.com/mozilla/kaldi.git
Data-structure change in split_scp.pl to prevent it becoming very slow when speakers have many utterances.
This commit is contained in:
Родитель
e1d56526c2
Коммит
b111a8baf8
|
@ -72,7 +72,7 @@ if ($num_jobs == 0) { # without -j option
|
||||||
@OUTPUTS = @ARGV;
|
@OUTPUTS = @ARGV;
|
||||||
} else {
|
} else {
|
||||||
for ($j = 0; $j < $num_jobs; $j++) {
|
for ($j = 0; $j < $num_jobs; $j++) {
|
||||||
if ($j == $job_id) {
|
if ($j == $job_id) {
|
||||||
if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
|
if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
|
||||||
else { push @OUTPUTS, "-"; }
|
else { push @OUTPUTS, "-"; }
|
||||||
} else {
|
} else {
|
||||||
|
@ -98,12 +98,12 @@ if ($utt2spk_file ne "") { # We have the --utt2spk option...
|
||||||
$s = $utt2spk{$u};
|
$s = $utt2spk{$u};
|
||||||
if(!defined $s) { die "No such utterance $u in utt2spk file $utt2spk_file"; }
|
if(!defined $s) { die "No such utterance $u in utt2spk file $utt2spk_file"; }
|
||||||
if(!defined $spk_count{$s}) {
|
if(!defined $spk_count{$s}) {
|
||||||
push @spkrs, $s;
|
push @spkrs, $s;
|
||||||
$spk_count{$s} = 0;
|
$spk_count{$s} = 0;
|
||||||
$spk_data{$s} = "";
|
$spk_data{$s} = []; # ref to new empty array.
|
||||||
}
|
}
|
||||||
$spk_count{$s}++;
|
$spk_count{$s}++;
|
||||||
$spk_data{$s} = $spk_data{$s} . $_;
|
push @{$spk_data{$s}}, $_;
|
||||||
}
|
}
|
||||||
# Now split as equally as possible ..
|
# Now split as equally as possible ..
|
||||||
# First allocate spks to files by allocating an approximately
|
# First allocate spks to files by allocating an approximately
|
||||||
|
@ -182,7 +182,7 @@ if ($utt2spk_file ne "") { # We have the --utt2spk option...
|
||||||
$error = 1;
|
$error = 1;
|
||||||
} else {
|
} else {
|
||||||
foreach $spk ( @{$scparray[$scpidx]} ) {
|
foreach $spk ( @{$scparray[$scpidx]} ) {
|
||||||
print F $spk_data{$spk};
|
print F @{$spk_data{$spk}};
|
||||||
$count += $spk_count{$spk};
|
$count += $spk_count{$spk};
|
||||||
}
|
}
|
||||||
if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }
|
if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }
|
||||||
|
@ -190,7 +190,7 @@ if ($utt2spk_file ne "") { # We have the --utt2spk option...
|
||||||
close(F);
|
close(F);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
# This block is the "normal" case where there is no --utt2spk
|
# This block is the "normal" case where there is no --utt2spk
|
||||||
# option and we just break into equal size chunks.
|
# option and we just break into equal size chunks.
|
||||||
|
|
||||||
open(I, "<$inscp") || die "Opening input scp file $inscp";
|
open(I, "<$inscp") || die "Opening input scp file $inscp";
|
||||||
|
|
Загрузка…
Ссылка в новой задаче