зеркало из https://github.com/mozilla/gecko-dev.git
Bug 917436 - Part 1: Record Unicode version in files generated from make_unicode.py. r=arai
--HG-- extra : rebase_source : e658e60d2d248456c3b748a476713125f0324cae
This commit is contained in:
Родитель
38fd958f01
Коммит
2a6d585066
|
@ -1,4 +1,5 @@
|
|||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
/* Unicode version: 6.2.0 */
|
||||
|
||||
/*
|
||||
* Any copyright is dedicated to the Public Domain.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
/* Unicode version: 6.2.0 */
|
||||
|
||||
/*
|
||||
* Any copyright is dedicated to the Public Domain.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
/* Unicode version: 8.0.0 */
|
||||
|
||||
/*
|
||||
* Any copyright is dedicated to the Public Domain.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
/* Unicode version: 6.2.0 */
|
||||
|
||||
/*
|
||||
* Any copyright is dedicated to the Public Domain.
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,4 +1,6 @@
|
|||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
/* Unicode version: 6.2.0 */
|
||||
/* Casefold Unicode version: 8.0.0 */
|
||||
|
||||
/*
|
||||
* Any copyright is dedicated to the Public Domain.
|
||||
|
@ -1523,4 +1525,3 @@ const uint8_t unicode::folding_index2[] = {
|
|||
0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
/* Unicode version: 6.2.0 */
|
||||
/* Casefold Unicode version: 8.0.0 */
|
||||
|
||||
#ifndef vm_UnicodeNonBMP_h
|
||||
#define vm_UnicodeNonBMP_h
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Based upon makeunicodedata.py
|
||||
# (http://hg.python.org/cpython/file/c8192197d23d/Tools/unicode/makeunicodedata.py)
|
||||
|
@ -20,9 +21,14 @@
|
|||
|
||||
from __future__ import print_function
|
||||
import csv
|
||||
import io
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from contextlib import closing
|
||||
|
||||
# ECMAScript 5 $ 7.2
|
||||
# ECMAScript 2016
|
||||
# §11.2 White Space
|
||||
whitespace = [
|
||||
# python doesn't support using control character names :(
|
||||
0x9, # CHARACTER TABULATION
|
||||
|
@ -33,7 +39,7 @@ whitespace = [
|
|||
ord(u'\N{ZERO WIDTH NO-BREAK SPACE}'), # also BOM
|
||||
]
|
||||
|
||||
# $ 7.3
|
||||
# §11.3 Line Terminators
|
||||
line_terminator = [
|
||||
0xa, # LINE FEED
|
||||
0xd, # CARRIAGE RETURN
|
||||
|
@ -41,15 +47,17 @@ line_terminator = [
|
|||
ord(u'\N{PARAGRAPH SEPARATOR}'),
|
||||
]
|
||||
|
||||
# These are also part of IdentifierPart $7.6
|
||||
ZWNJ = ord(u'\N{ZERO WIDTH NON-JOINER}')
|
||||
ZWJ = ord(u'\N{ZERO WIDTH JOINER}')
|
||||
# These are also part of IdentifierPart §11.6 Names and Keywords
|
||||
compatibility_identifier_part = [
|
||||
ord(u'\N{ZERO WIDTH NON-JOINER}'),
|
||||
ord(u'\N{ZERO WIDTH JOINER}'),
|
||||
]
|
||||
|
||||
FLAG_SPACE = 1 << 0
|
||||
FLAG_LETTER = 1 << 1
|
||||
FLAG_IDENTIFIER_PART = 1 << 2
|
||||
|
||||
MAX = 0xffff
|
||||
MAX_BMP = 0xffff
|
||||
|
||||
public_domain = """
|
||||
/*
|
||||
|
@ -58,7 +66,27 @@ public_domain = """
|
|||
*/
|
||||
"""
|
||||
|
||||
def read_unicode_data(unicode_file):
|
||||
mpl_license = """\
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
"""
|
||||
|
||||
warning_message = """\
|
||||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
"""
|
||||
|
||||
unicode_version_message = """\
|
||||
/* Unicode version: {0} */
|
||||
"""
|
||||
|
||||
casefold_version_message = """\
|
||||
/* Casefold Unicode version: {0} */
|
||||
"""
|
||||
|
||||
def read_unicode_data(unicode_data):
|
||||
"""
|
||||
If you want to understand how this wonderful file format works checkout
|
||||
Unicode Standard Annex #44 - Unicode Character Database
|
||||
|
@ -137,34 +165,22 @@ def make_non_bmp_convert_macro(out_file, name, convert_map):
|
|||
out_file.write(' \\\n'.join(lines))
|
||||
out_file.write('\n')
|
||||
|
||||
def generate_unicode_stuff(unicode_data, case_folding,
|
||||
data_file, non_bmp_file,
|
||||
test_mapping, test_non_bmp_mapping,
|
||||
test_space, test_icase):
|
||||
def process_unicode_data(unicode_data):
|
||||
dummy = (0, 0, 0)
|
||||
table = [dummy]
|
||||
cache = {dummy: 0}
|
||||
index = [0] * (MAX + 1)
|
||||
index = [0] * (MAX_BMP + 1)
|
||||
same_upper_map = {}
|
||||
same_upper_dummy = (0, 0, 0)
|
||||
same_upper_table = [same_upper_dummy]
|
||||
same_upper_cache = {same_upper_dummy: 0}
|
||||
same_upper_index = [0] * (MAX + 1)
|
||||
folding_map = {}
|
||||
rev_folding_map = {}
|
||||
folding_dummy = (0, 0, 0, 0)
|
||||
folding_table = [folding_dummy]
|
||||
folding_cache = {folding_dummy: 0}
|
||||
folding_index = [0] * (MAX + 1)
|
||||
same_upper_index = [0] * (MAX_BMP + 1)
|
||||
|
||||
test_table = {}
|
||||
test_space_table = []
|
||||
folding_tests = []
|
||||
folding_codes = set()
|
||||
|
||||
non_bmp_lower_map = {}
|
||||
non_bmp_upper_map = {}
|
||||
non_bmp_folding_map = {}
|
||||
non_bmp_rev_folding_map = {}
|
||||
|
||||
for row in read_unicode_data(unicode_data):
|
||||
code = row[0]
|
||||
|
@ -190,7 +206,7 @@ def generate_unicode_stuff(unicode_data, case_folding,
|
|||
else:
|
||||
lower = code
|
||||
|
||||
if code > MAX:
|
||||
if code > MAX_BMP:
|
||||
if code != lower:
|
||||
non_bmp_lower_map[code] = lower
|
||||
if code != upper:
|
||||
|
@ -201,9 +217,13 @@ def generate_unicode_stuff(unicode_data, case_folding,
|
|||
if category == 'Zs' or code in whitespace or code in line_terminator:
|
||||
flags |= FLAG_SPACE
|
||||
test_space_table.append(code)
|
||||
if category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']: # $ 7.6 (UnicodeLetter)
|
||||
|
||||
# §7.6 (UnicodeLetter)
|
||||
if category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']:
|
||||
flags |= FLAG_LETTER
|
||||
if category in ['Mn', 'Mc', 'Nd', 'Pc'] or code == ZWNJ or code == ZWJ: # $ 7.6 (IdentifierPart)
|
||||
|
||||
# §7.6 (IdentifierPart)
|
||||
if category in ['Mn', 'Mc', 'Nd', 'Pc'] or code in compatibility_identifier_part:
|
||||
flags |= FLAG_IDENTIFIER_PART
|
||||
|
||||
test_table[code] = (upper, lower, name, alias)
|
||||
|
@ -226,7 +246,7 @@ def generate_unicode_stuff(unicode_data, case_folding,
|
|||
table.append(item)
|
||||
index[code] = i
|
||||
|
||||
for code in range(0, MAX + 1):
|
||||
for code in range(0, MAX_BMP + 1):
|
||||
entry = test_table.get(code)
|
||||
|
||||
if not entry:
|
||||
|
@ -256,12 +276,33 @@ def generate_unicode_stuff(unicode_data, case_folding,
|
|||
same_upper_table.append(item)
|
||||
same_upper_index[code] = i
|
||||
|
||||
return (
|
||||
table, index,
|
||||
same_upper_table, same_upper_index,
|
||||
non_bmp_lower_map, non_bmp_upper_map,
|
||||
test_table, test_space_table,
|
||||
)
|
||||
|
||||
def process_case_folding(case_folding):
|
||||
folding_map = {}
|
||||
rev_folding_map = {}
|
||||
folding_dummy = (0, 0, 0, 0)
|
||||
folding_table = [folding_dummy]
|
||||
folding_cache = {folding_dummy: 0}
|
||||
folding_index = [0] * (MAX_BMP + 1)
|
||||
|
||||
folding_tests = []
|
||||
folding_codes = set()
|
||||
|
||||
non_bmp_folding_map = {}
|
||||
non_bmp_rev_folding_map = {}
|
||||
|
||||
for row in read_case_folding(case_folding):
|
||||
code = row[0]
|
||||
mapping = row[2]
|
||||
folding_map[code] = mapping
|
||||
|
||||
if code > MAX:
|
||||
if code > MAX_BMP:
|
||||
non_bmp_folding_map[code] = mapping
|
||||
non_bmp_rev_folding_map[mapping] = code
|
||||
|
||||
|
@ -294,7 +335,7 @@ def generate_unicode_stuff(unicode_data, case_folding,
|
|||
item.append(folding)
|
||||
folding_tests.append(item + rev_folding)
|
||||
|
||||
if code > MAX:
|
||||
if code > MAX_BMP:
|
||||
continue
|
||||
|
||||
folding_d = folding - code
|
||||
|
@ -317,46 +358,58 @@ def generate_unicode_stuff(unicode_data, case_folding,
|
|||
folding_cache[item] = i = len(folding_table)
|
||||
folding_table.append(item)
|
||||
folding_index[code] = i
|
||||
return (
|
||||
folding_table, folding_index,
|
||||
non_bmp_folding_map, non_bmp_rev_folding_map,
|
||||
folding_tests
|
||||
)
|
||||
|
||||
non_bmp_file.write("""/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* Generated by make_unicode.py DO NOT MODIFY */
|
||||
|
||||
def make_non_bmp_file(version, casefold_version,
|
||||
non_bmp_lower_map, non_bmp_upper_map,
|
||||
non_bmp_folding_map, non_bmp_rev_folding_map):
|
||||
file_name = 'UnicodeNonBMP.h';
|
||||
with io.open(file_name, mode='wb') as non_bmp_file:
|
||||
non_bmp_file.write(mpl_license)
|
||||
non_bmp_file.write('\n')
|
||||
non_bmp_file.write(warning_message)
|
||||
non_bmp_file.write(unicode_version_message.format(version))
|
||||
non_bmp_file.write(casefold_version_message.format(casefold_version))
|
||||
non_bmp_file.write("""
|
||||
#ifndef vm_UnicodeNonBMP_h
|
||||
#define vm_UnicodeNonBMP_h
|
||||
|
||||
""")
|
||||
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
|
||||
non_bmp_file.write('\n')
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map)
|
||||
non_bmp_file.write('\n')
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map)
|
||||
non_bmp_file.write('\n')
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map)
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
|
||||
non_bmp_file.write('\n')
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map)
|
||||
non_bmp_file.write('\n')
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map)
|
||||
non_bmp_file.write('\n')
|
||||
make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map)
|
||||
|
||||
non_bmp_file.write("""
|
||||
non_bmp_file.write("""
|
||||
#endif /* vm_UnicodeNonBMP_h */
|
||||
""")
|
||||
|
||||
test_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
test_mapping.write(public_domain)
|
||||
test_mapping.write('var mapping = [\n')
|
||||
for code in range(0, MAX + 1):
|
||||
entry = test_table.get(code)
|
||||
def make_bmp_mapping_test(version, test_table):
|
||||
file_name = '../tests/ecma_5/String/string-upper-lower-mapping.js'
|
||||
with io.open(file_name, mode='wb') as test_mapping:
|
||||
test_mapping.write(warning_message)
|
||||
test_mapping.write(unicode_version_message.format(version))
|
||||
test_mapping.write(public_domain)
|
||||
test_mapping.write('var mapping = [\n')
|
||||
for code in range(0, MAX_BMP + 1):
|
||||
entry = test_table.get(code)
|
||||
|
||||
if entry:
|
||||
(upper, lower, name, alias) = entry
|
||||
test_mapping.write(' [' + hex(upper) + ', ' + hex(lower) + '], /* ' +
|
||||
name + (' (' + alias + ')' if alias else '') + ' */\n')
|
||||
else:
|
||||
test_mapping.write(' [' + hex(code) + ', ' + hex(code) + '],\n')
|
||||
test_mapping.write('];')
|
||||
test_mapping.write("""
|
||||
if entry:
|
||||
(upper, lower, name, alias) = entry
|
||||
test_mapping.write(' [' + hex(upper) + ', ' + hex(lower) + '], /* ' +
|
||||
name + (' (' + alias + ')' if alias else '') + ' */\n')
|
||||
else:
|
||||
test_mapping.write(' [' + hex(code) + ', ' + hex(code) + '],\n')
|
||||
test_mapping.write('];')
|
||||
test_mapping.write("""
|
||||
assertEq(mapping.length, 0x10000);
|
||||
for (var i = 0; i <= 0xffff; i++) {
|
||||
var char = String.fromCharCode(i);
|
||||
|
@ -370,28 +423,35 @@ if (typeof reportCompare === "function")
|
|||
reportCompare(true, true);
|
||||
""")
|
||||
|
||||
test_non_bmp_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
test_non_bmp_mapping.write(public_domain)
|
||||
for code in sorted(non_bmp_upper_map.keys()):
|
||||
test_non_bmp_mapping.write("""\
|
||||
def make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map):
|
||||
file_name = '../tests/ecma_6/String/string-code-point-upper-lower-mapping.js'
|
||||
with io.open(file_name, mode='wb') as test_non_bmp_mapping:
|
||||
test_non_bmp_mapping.write(warning_message)
|
||||
test_non_bmp_mapping.write(unicode_version_message.format(version))
|
||||
test_non_bmp_mapping.write(public_domain)
|
||||
for code in sorted(non_bmp_upper_map.keys()):
|
||||
test_non_bmp_mapping.write("""\
|
||||
assertEq(String.fromCodePoint(0x{:x}).toUpperCase().codePointAt(0), 0x{:x});
|
||||
""".format(code, non_bmp_upper_map[code]))
|
||||
for code in sorted(non_bmp_lower_map.keys()):
|
||||
test_non_bmp_mapping.write("""\
|
||||
for code in sorted(non_bmp_lower_map.keys()):
|
||||
test_non_bmp_mapping.write("""\
|
||||
assertEq(String.fromCodePoint(0x{:x}).toLowerCase().codePointAt(0), 0x{:x});
|
||||
""".format(code, non_bmp_lower_map[code]))
|
||||
|
||||
test_non_bmp_mapping.write("""
|
||||
test_non_bmp_mapping.write("""
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
||||
""")
|
||||
|
||||
|
||||
test_space.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
test_space.write(public_domain)
|
||||
test_space.write('var onlySpace = String.fromCharCode(' +
|
||||
', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
|
||||
test_space.write("""
|
||||
def make_space_test(version, test_space_table):
|
||||
file_name = '../tests/ecma_5/String/string-space-trim.js'
|
||||
with io.open(file_name, mode='wb') as test_space:
|
||||
test_space.write(warning_message)
|
||||
test_space.write(unicode_version_message.format(version))
|
||||
test_space.write(public_domain)
|
||||
test_space.write('var onlySpace = String.fromCharCode(' +
|
||||
', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
|
||||
test_space.write("""
|
||||
assertEq(onlySpace.trim(), "");
|
||||
assertEq((onlySpace + 'aaaa').trim(), 'aaaa');
|
||||
assertEq(('aaaa' + onlySpace).trim(), 'aaaa');
|
||||
|
@ -401,9 +461,13 @@ if (typeof reportCompare === "function")
|
|||
reportCompare(true, true);
|
||||
""")
|
||||
|
||||
test_icase.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
test_icase.write(public_domain)
|
||||
test_icase.write("""
|
||||
def make_icase_test(version, folding_tests):
|
||||
file_name = '../tests/ecma_6/RegExp/unicode-ignoreCase.js'
|
||||
with io.open(file_name, mode='wb') as test_icase:
|
||||
test_icase.write(warning_message)
|
||||
test_icase.write(unicode_version_message.format(version))
|
||||
test_icase.write(public_domain)
|
||||
test_icase.write("""
|
||||
var BUGNUMBER = 1135377;
|
||||
var summary = "Implement RegExp unicode flag -- ignoreCase flag.";
|
||||
|
||||
|
@ -417,26 +481,30 @@ function test(code, ...equivs) {
|
|||
assertEqArray(codeRe.exec("<" + ans + ">"), [ans]);
|
||||
}
|
||||
""")
|
||||
for args in folding_tests:
|
||||
test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
|
||||
test_icase.write("""
|
||||
for args in folding_tests:
|
||||
test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
|
||||
test_icase.write("""
|
||||
if (typeof reportCompare === "function")
|
||||
reportCompare(true, true);
|
||||
""")
|
||||
|
||||
def make_unicode_file(version, casefold_version,
|
||||
table, index,
|
||||
same_upper_table, same_upper_index,
|
||||
folding_table, folding_index):
|
||||
index1, index2, shift = splitbins(index)
|
||||
|
||||
# Don't forget to update CharInfo in Unicode.cpp if you need to change this
|
||||
# Don't forget to update CharInfo in Unicode.h if you need to change this
|
||||
assert shift == 5
|
||||
|
||||
same_upper_index1, same_upper_index2, same_upper_shift = splitbins(same_upper_index)
|
||||
|
||||
# Don't forget to update CharInfo in Unicode.cpp if you need to change this
|
||||
# Don't forget to update CodepointsWithSameUpperCaseInfo in Unicode.h if you need to change this
|
||||
assert same_upper_shift == 6
|
||||
|
||||
folding_index1, folding_index2, folding_shift = splitbins(folding_index)
|
||||
|
||||
# Don't forget to update CharInfo in Unicode.cpp if you need to change this
|
||||
# Don't forget to update CaseFoldInfo in Unicode.h if you need to change this
|
||||
assert folding_shift == 6
|
||||
|
||||
# verify correctness
|
||||
|
@ -512,20 +580,6 @@ if (typeof reportCompare === "function")
|
|||
* stop if you found the best shift
|
||||
*/
|
||||
"""
|
||||
data_file.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
|
||||
data_file.write(public_domain)
|
||||
data_file.write('#include "vm/Unicode.h"\n\n')
|
||||
data_file.write('using namespace js;\n')
|
||||
data_file.write('using namespace js::unicode;\n')
|
||||
data_file.write(comment)
|
||||
data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n')
|
||||
for d in table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
|
||||
def dump(data, name, file):
|
||||
file.write('const uint8_t unicode::' + name + '[] = {\n')
|
||||
|
||||
|
@ -546,38 +600,54 @@ if (typeof reportCompare === "function")
|
|||
file.write('\n'.join(lines))
|
||||
file.write('\n};\n')
|
||||
|
||||
dump(index1, 'index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(index2, 'index2', data_file)
|
||||
data_file.write('\n')
|
||||
file_name = 'Unicode.cpp'
|
||||
with io.open(file_name, 'wb') as data_file:
|
||||
data_file.write(warning_message)
|
||||
data_file.write(unicode_version_message.format(version))
|
||||
data_file.write(casefold_version_message.format(casefold_version))
|
||||
data_file.write(public_domain)
|
||||
data_file.write('#include "vm/Unicode.h"\n\n')
|
||||
data_file.write('using namespace js;\n')
|
||||
data_file.write('using namespace js::unicode;\n')
|
||||
data_file.write(comment)
|
||||
data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n')
|
||||
for d in table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
|
||||
data_file.write('const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {\n')
|
||||
for d in same_upper_table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
dump(index1, 'index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(index2, 'index2', data_file)
|
||||
data_file.write('\n')
|
||||
|
||||
dump(same_upper_index1, 'codepoints_with_same_upper_index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(same_upper_index2, 'codepoints_with_same_upper_index2', data_file)
|
||||
data_file.write('\n')
|
||||
data_file.write('const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {\n')
|
||||
for d in same_upper_table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
|
||||
data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n')
|
||||
for d in folding_table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
dump(same_upper_index1, 'codepoints_with_same_upper_index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(same_upper_index2, 'codepoints_with_same_upper_index2', data_file)
|
||||
data_file.write('\n')
|
||||
|
||||
dump(folding_index1, 'folding_index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(folding_index2, 'folding_index2', data_file)
|
||||
data_file.write('\n')
|
||||
data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n')
|
||||
for d in folding_table:
|
||||
data_file.write(' {')
|
||||
data_file.write(', '.join((str(e) for e in d)))
|
||||
data_file.write('},\n')
|
||||
data_file.write('};\n')
|
||||
data_file.write('\n')
|
||||
|
||||
data_file.write('\n')
|
||||
dump(folding_index1, 'folding_index1', data_file)
|
||||
data_file.write('\n')
|
||||
dump(folding_index2, 'folding_index2', data_file)
|
||||
data_file.write('\n')
|
||||
|
||||
def getsize(data):
|
||||
""" return smallest possible integer size for the given array """
|
||||
|
@ -648,38 +718,124 @@ def splitbins(t):
|
|||
assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
|
||||
return best
|
||||
|
||||
if __name__ == '__main__':
|
||||
def update_unicode(args):
|
||||
import urllib2
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
print('Always make sure you have the newest UnicodeData.txt!')
|
||||
unicode_data = open(sys.argv[1], 'r')
|
||||
else:
|
||||
print('Downloading UnicodeData.txt...')
|
||||
reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/UnicodeData.txt')
|
||||
data = reader.read()
|
||||
reader.close()
|
||||
unicode_data = open('UnicodeData.txt', 'w+')
|
||||
unicode_data.write(data)
|
||||
unicode_data.seek(0)
|
||||
def to_download_url(version):
|
||||
baseurl = 'http://unicode.org/Public'
|
||||
if version is 'UNIDATA':
|
||||
return '%s/%s' % (baseurl, version)
|
||||
return '%s/%s/ucd' % (baseurl, version)
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
print('Always make sure you have the newest CaseFolding.txt!')
|
||||
case_folding = open(sys.argv[2], 'r')
|
||||
else:
|
||||
print('Downloading CaseFolding.txt...')
|
||||
reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/CaseFolding.txt')
|
||||
data = reader.read()
|
||||
reader.close()
|
||||
case_folding = open('CaseFolding.txt', 'w+')
|
||||
case_folding.write(data)
|
||||
case_folding.seek(0)
|
||||
unicode_info = {
|
||||
'name': 'Unicode',
|
||||
'version': args.version,
|
||||
'url': to_download_url(args.version),
|
||||
}
|
||||
# TODO: Remove this dict and use a single Unicode version when bug 1230490 has relanded.
|
||||
casefold_info = {
|
||||
'name': 'Casefold Unicode',
|
||||
'version': args.casefold_version,
|
||||
'url': to_download_url(args.casefold_version),
|
||||
}
|
||||
|
||||
def print_info(info):
|
||||
if info['version'] is not None:
|
||||
print('\t%s version: %s' % (info['name'], info['version']))
|
||||
print('\t%s download url: %s' % (info['name'], info['url']))
|
||||
else:
|
||||
print('\t%s uses local files.' % info['name'])
|
||||
print('\tAlways make sure you have the newest Unicode files!')
|
||||
|
||||
print('Arguments:')
|
||||
print_info(unicode_info)
|
||||
print_info(casefold_info)
|
||||
print('')
|
||||
|
||||
def download_or_open(info, fname):
|
||||
tfile_path = os.path.join(os.getcwd(), fname)
|
||||
if info['version'] is not None:
|
||||
print('Downloading %s...' % fname)
|
||||
unicode_data_url = '%s/%s' % (info['url'], fname)
|
||||
with closing(urllib2.urlopen(unicode_data_url)) as reader:
|
||||
data = reader.read()
|
||||
tfile = io.open(tfile_path, 'w+b')
|
||||
tfile.write(data)
|
||||
tfile.flush()
|
||||
tfile.seek(0)
|
||||
else:
|
||||
if not os.path.isfile(tfile_path):
|
||||
raise RuntimeError('File not found: %s' % tfile_path)
|
||||
tfile = io.open(tfile_path, 'rb');
|
||||
return tfile
|
||||
|
||||
def version_from_file(f, fname):
|
||||
pat_version = re.compile(r"# %s-(?P<version>\d+\.\d+\.\d+).txt" % fname)
|
||||
(unicode_version) = pat_version.match(f.readline()).group("version")
|
||||
return unicode_version
|
||||
|
||||
with download_or_open(unicode_info, 'UnicodeData.txt') as unicode_data, \
|
||||
download_or_open(casefold_info, 'CaseFolding.txt') as case_folding, \
|
||||
download_or_open(unicode_info, 'DerivedCoreProperties.txt') as derived_core_properties:
|
||||
version = version_from_file(derived_core_properties, 'DerivedCoreProperties')
|
||||
casefold_version = version_from_file(case_folding, 'CaseFolding')
|
||||
|
||||
print('Processing...')
|
||||
(
|
||||
table, index,
|
||||
same_upper_table, same_upper_index,
|
||||
non_bmp_lower_map, non_bmp_upper_map,
|
||||
test_table, test_space_table
|
||||
) = process_unicode_data(unicode_data)
|
||||
(
|
||||
folding_table, folding_index,
|
||||
non_bmp_folding_map, non_bmp_rev_folding_map,
|
||||
folding_tests
|
||||
) = process_case_folding(case_folding)
|
||||
|
||||
print('Generating...')
|
||||
generate_unicode_stuff(unicode_data, case_folding,
|
||||
open('Unicode.cpp', 'w'),
|
||||
open('UnicodeNonBMP.h', 'w'),
|
||||
open('../tests/ecma_5/String/string-upper-lower-mapping.js', 'w'),
|
||||
open('../tests/ecma_6/String/string-code-point-upper-lower-mapping.js', 'w'),
|
||||
open('../tests/ecma_5/String/string-space-trim.js', 'w'),
|
||||
open('../tests/ecma_6/RegExp/unicode-ignoreCase.js', 'w'))
|
||||
make_unicode_file(version, casefold_version,
|
||||
table, index,
|
||||
same_upper_table, same_upper_index,
|
||||
folding_table, folding_index)
|
||||
make_non_bmp_file(version, casefold_version,
|
||||
non_bmp_lower_map, non_bmp_upper_map,
|
||||
non_bmp_folding_map, non_bmp_rev_folding_map)
|
||||
|
||||
make_bmp_mapping_test(version, test_table)
|
||||
make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map)
|
||||
make_space_test(version, test_space_table)
|
||||
make_icase_test(casefold_version, folding_tests)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
# This script must be run from js/src/vm to work correctly.
|
||||
if '/'.join(os.path.normpath(os.getcwd()).split(os.sep)[-3:]) != 'js/src/vm':
|
||||
raise RuntimeError('%s must be run from js/src/vm' % sys.argv[0])
|
||||
|
||||
# !!! IMPORTANT !!!
|
||||
# We currently use two different Unicode versions (6.2 and 8.0) for
|
||||
# separate parts of the engine. This is all just temporary until
|
||||
# bug 1230490 has relanded. As soon as bug 1230490 has relanded, this
|
||||
# script can be simplified by removing all logic to handle different
|
||||
# Unicode versions.
|
||||
|
||||
parser = argparse.ArgumentParser(description='Update Unicode data.')
|
||||
|
||||
parser.add_argument('--version',
|
||||
help='Optional Unicode version number. If specified, downloads the\
|
||||
selected version from <http://unicode.org/Public>. If not specified\
|
||||
uses the existing local files to generate the Unicode data. The\
|
||||
number must match a published Unicode version, e.g. use\
|
||||
"--version=8.0.0" to download Unicode 8 files. Alternatively use\
|
||||
"--version=UNIDATA" to download the latest published version.')
|
||||
# TODO: Remove this parameter when bug 1230490 has relanded.
|
||||
parser.add_argument('--casefold-version',
|
||||
help='Unicode version number for case-folding data. Has the same meaning\
|
||||
as --version, except only used for case-folding data.')
|
||||
|
||||
parser.set_defaults(func=update_unicode)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
|
Загрузка…
Ссылка в новой задаче