Data-structure change in split_scp.pl to prevent it becoming very slow when speakers have many utterances.

This commit is contained in:
Daniel Povey 2016-05-13 21:13:25 -04:00
Родитель e1d56526c2
Коммит b111a8baf8
1 изменённых файлов: 6 добавлений и 6 удалений

Просмотреть файл

@ -100,10 +100,10 @@ if ($utt2spk_file ne "") { # We have the --utt2spk option...
if(!defined $spk_count{$s}) {
push @spkrs, $s;
$spk_count{$s} = 0;
$spk_data{$s} = "";
$spk_data{$s} = []; # ref to new empty array.
}
$spk_count{$s}++;
$spk_data{$s} = $spk_data{$s} . $_;
push @{$spk_data{$s}}, $_;
}
# Now split as equally as possible ..
# First allocate spks to files by allocating an approximately
@ -182,7 +182,7 @@ if ($utt2spk_file ne "") { # We have the --utt2spk option...
$error = 1;
} else {
foreach $spk ( @{$scparray[$scpidx]} ) {
print F $spk_data{$spk};
print F @{$spk_data{$spk}};
$count += $spk_count{$spk};
}
if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }