Bug 562908. Add genignorable.pl script to regenerate ignorable.x-ccmap. r=smontagu

This commit is contained in:
Masatoshi Kimura 2010-05-24 09:36:49 +12:00
Родитель 080feb5d07
Коммит 357e1249e5
2 изменённых файлов: 67 добавлений и 1 удалений

Просмотреть файл

@ -228,7 +228,7 @@ sub read_input
my ($u, $comment) = split /\s+/, $1, 2;
$u =~ s/,//g;
$u =~ tr/A-Z/a-z/;
next if /^0x.*[^0-9a-f]+.*/;
next if $u =~ /^0x.*[^0-9a-f]+.*/;
my $usv = oct $u;
if ( 0xd800 <= $usv && $usv <= 0xdfff || # surrogate code points

Просмотреть файл

@ -0,0 +1,66 @@
#!/usr/bin/perl
open $f, 'UnicodeData-Latest.txt' or die $!;
while (<$f>) {
@columns = split(/;/);
# print "$columns[0] : $columns[1]\n";
$names{hex($columns[0])} = $columns[1];
}
close $f;
open $f, 'DerivedCoreProperties.txt' or die $!;
$re = '[';
while (<$f>) {
next unless /Default_Ignorable_Code_Point/;
next unless /^([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))?/;
($start, $end) = (hex($1), hex($2));
$end = $start unless $end;
for ($c = $start; $c <= $end; $c++) {
printf "0x%04X", $c;
printf " // $names{$c}" if $names{$c};
print "\n";
}
if (!$prevend || $start > $prevend + 1) {
$re .= make_unicode_range($prevstart, $prevend) if $prevstart;
$prevstart = $start;
}
$prevend = $end;
}
$re .= make_unicode_range($prevstart, $prevend).']';
print STDERR $re;
close $f;
sub make_unicode_range
{
my ($start, $end) = @_;
if ($start > 0xffff) {
my $starths = ($start - 0x10000) >> 10 | 0xd800;
my $startls = ($start - 0x10000) & 0x3ff | 0xdc00;
my $endhs = ($end - 0x10000) >> 10 | 0xd800;
my $endls = ($end - 0x10000) & 0x3ff | 0xdc00;
if ($starths == $endhs) {
return sprintf("]|\\u%04x[\\u%04x-\\u%04x", $starths, $startls, $endls)
}
my $re = '';
if ($startls > 0xdc00) {
$re .= sprintf("]|\\u%04x[\\u%04x-\\udfff", $starths, $startls);
$starths++;
}
if ($endhs > $starths) {
$endhs-- if ($endls < 0xdfff);
$re .= sprintf("]|[\\u%04x-\\u%04x][\\udc00-\\udfff", $starths, $endhs);
}
if ($endls < 0xdfff) {
$re .= sprintf("]|\\u%04x[\\udc00-\\u%04x", $endhs, $endls);
}
return $re;
} elsif ($start == $end) {
return sprintf("\\u%04x", $start);
} else {
return sprintf("\\u%04x-\\u%04x", $start, $end);
}
}