Bug 917436 - Part 1: Record Unicode version in files generated from make_unicode.py. r=arai

--HG-- extra : rebase_source : e658e60d2d248456c3b748a476713125f0324cae
2016-11-07 16:14:27 +01:00 · 2016-11-07 16:14:27 +01:00 · 2a6d585066
--- a/js/src/tests/ecma_5/String/string-space-trim.js
+++ b/js/src/tests/ecma_5/String/string-space-trim.js
@ -1,4 +1,5 @@
 /* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 6.2.0 */

 /*
 * Any copyright is dedicated to the Public Domain.
--- a/js/src/tests/ecma_5/String/string-upper-lower-mapping.js
+++ b/js/src/tests/ecma_5/String/string-upper-lower-mapping.js
@ -1,4 +1,5 @@
 /* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 6.2.0 */

 /*
 * Any copyright is dedicated to the Public Domain.
--- a/js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js
+++ b/js/src/tests/ecma_6/RegExp/unicode-ignoreCase.js
@ -1,4 +1,5 @@
 /* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 8.0.0 */

 /*
 * Any copyright is dedicated to the Public Domain.
--- a/js/src/tests/ecma_6/String/string-code-point-upper-lower-mapping.js
+++ b/js/src/tests/ecma_6/String/string-code-point-upper-lower-mapping.js
@ -1,4 +1,5 @@
 /* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 6.2.0 */

 /*
 * Any copyright is dedicated to the Public Domain.
--- a/js/src/vm/DerivedCoreProperties.txt
+++ b/js/src/vm/DerivedCoreProperties.txt
--- a/js/src/vm/Unicode.cpp
+++ b/js/src/vm/Unicode.cpp
@ -1,4 +1,6 @@
 /* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 6.2.0 */
+/* Casefold Unicode version: 8.0.0 */

 /*
 * Any copyright is dedicated to the Public Domain.
@ -1523,4 +1525,3 @@ const uint8_t unicode::folding_index2[] = {
      0,   0,   0,   0,   0,   0,
 };

-
--- a/js/src/vm/UnicodeNonBMP.h
+++ b/js/src/vm/UnicodeNonBMP.h
@ -5,6 +5,8 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 /* Generated by make_unicode.py DO NOT MODIFY */
+/* Unicode version: 6.2.0 */
+/* Casefold Unicode version: 8.0.0 */

 #ifndef vm_UnicodeNonBMP_h
 #define vm_UnicodeNonBMP_h
--- a/js/src/vm/make_unicode.py
+++ b/js/src/vm/make_unicode.py
@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Based upon makeunicodedata.py
 # (http://hg.python.org/cpython/file/c8192197d23d/Tools/unicode/makeunicodedata.py)
@ -20,9 +21,14 @@

 from __future__ import print_function
 import csv
+import io
+import re
+import os
 import sys
+from contextlib import closing

-# ECMAScript 5 $ 7.2
+# ECMAScript 2016
+# §11.2 White Space
 whitespace = [
    # python doesn't support using control character names :(
    0x9, # CHARACTER TABULATION
@ -33,7 +39,7 @@ whitespace = [
    ord(u'\N{ZERO WIDTH NO-BREAK SPACE}'), # also BOM
 ]

-# $ 7.3
+# §11.3 Line Terminators
 line_terminator = [
    0xa, # LINE FEED
    0xd, # CARRIAGE RETURN
@ -41,15 +47,17 @@ line_terminator = [
    ord(u'\N{PARAGRAPH SEPARATOR}'),
 ]

-# These are also part of IdentifierPart $7.6
-ZWNJ = ord(u'\N{ZERO WIDTH NON-JOINER}')
-ZWJ = ord(u'\N{ZERO WIDTH JOINER}')
+# These are also part of IdentifierPart §11.6 Names and Keywords
+compatibility_identifier_part = [
+    ord(u'\N{ZERO WIDTH NON-JOINER}'),
+    ord(u'\N{ZERO WIDTH JOINER}'),
+]

 FLAG_SPACE = 1 << 0
 FLAG_LETTER = 1 << 1
 FLAG_IDENTIFIER_PART = 1 << 2

-MAX = 0xffff
+MAX_BMP = 0xffff

 public_domain = """
 /*
@ -58,7 +66,27 @@ public_domain = """
 */
 """

-def read_unicode_data(unicode_file):
+mpl_license = """\
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+"""
+
+warning_message = """\
+/* Generated by make_unicode.py DO NOT MODIFY */
+"""
+
+unicode_version_message = """\
+/* Unicode version: {0} */
+"""
+
+casefold_version_message = """\
+/* Casefold Unicode version: {0} */
+"""
+
+def read_unicode_data(unicode_data):
    """
        If you want to understand how this wonderful file format works checkout
          Unicode Standard Annex #44 - Unicode Character Database
@ -137,34 +165,22 @@ def make_non_bmp_convert_macro(out_file, name, convert_map):
    out_file.write(' \\\n'.join(lines))
    out_file.write('\n')

-def generate_unicode_stuff(unicode_data, case_folding,
-                           data_file, non_bmp_file,
-                           test_mapping, test_non_bmp_mapping,
-                           test_space, test_icase):
+def process_unicode_data(unicode_data):
    dummy = (0, 0, 0)
    table = [dummy]
    cache = {dummy: 0}
-    index = [0] * (MAX + 1)
+    index = [0] * (MAX_BMP + 1)
    same_upper_map = {}
    same_upper_dummy = (0, 0, 0)
    same_upper_table = [same_upper_dummy]
    same_upper_cache = {same_upper_dummy: 0}
-    same_upper_index = [0] * (MAX + 1)
-    folding_map = {}
-    rev_folding_map = {}
-    folding_dummy = (0, 0, 0, 0)
-    folding_table = [folding_dummy]
-    folding_cache = {folding_dummy: 0}
-    folding_index = [0] * (MAX + 1)
+    same_upper_index = [0] * (MAX_BMP + 1)
+
    test_table = {}
    test_space_table = []
-    folding_tests = []
-    folding_codes = set()

    non_bmp_lower_map = {}
    non_bmp_upper_map = {}
-    non_bmp_folding_map = {}
-    non_bmp_rev_folding_map = {}

    for row in read_unicode_data(unicode_data):
        code = row[0]
@ -190,7 +206,7 @@ def generate_unicode_stuff(unicode_data, case_folding,
        else:
            lower = code

-        if code > MAX:
+        if code > MAX_BMP:
            if code != lower:
                non_bmp_lower_map[code] = lower
            if code != upper:
@ -201,9 +217,13 @@ def generate_unicode_stuff(unicode_data, case_folding,
        if category == 'Zs' or code in whitespace or code in line_terminator:
            flags |= FLAG_SPACE
            test_space_table.append(code)
-        if category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']: # $ 7.6 (UnicodeLetter)
+
+        # §7.6 (UnicodeLetter)
+        if category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']:
            flags |= FLAG_LETTER
-        if category in ['Mn', 'Mc', 'Nd', 'Pc'] or code == ZWNJ or code == ZWJ: # $ 7.6 (IdentifierPart)
+
+        # §7.6 (IdentifierPart)
+        if category in ['Mn', 'Mc', 'Nd', 'Pc'] or code in compatibility_identifier_part:
            flags |= FLAG_IDENTIFIER_PART

        test_table[code] = (upper, lower, name, alias)
@ -226,7 +246,7 @@ def generate_unicode_stuff(unicode_data, case_folding,
            table.append(item)
        index[code] = i

-    for code in range(0, MAX + 1):
+    for code in range(0, MAX_BMP + 1):
        entry = test_table.get(code)

        if not entry:
@ -256,12 +276,33 @@ def generate_unicode_stuff(unicode_data, case_folding,
            same_upper_table.append(item)
        same_upper_index[code] = i

+    return (
+        table, index,
+        same_upper_table, same_upper_index,
+        non_bmp_lower_map, non_bmp_upper_map,
+        test_table, test_space_table,
+    )
+
+def process_case_folding(case_folding):
+    folding_map = {}
+    rev_folding_map = {}
+    folding_dummy = (0, 0, 0, 0)
+    folding_table = [folding_dummy]
+    folding_cache = {folding_dummy: 0}
+    folding_index = [0] * (MAX_BMP + 1)
+
+    folding_tests = []
+    folding_codes = set()
+
+    non_bmp_folding_map = {}
+    non_bmp_rev_folding_map = {}
+
    for row in read_case_folding(case_folding):
        code = row[0]
        mapping = row[2]
        folding_map[code] = mapping

-        if code > MAX:
+        if code > MAX_BMP:
            non_bmp_folding_map[code] = mapping
            non_bmp_rev_folding_map[mapping] = code

@ -294,7 +335,7 @@ def generate_unicode_stuff(unicode_data, case_folding,
                item.append(folding)
            folding_tests.append(item + rev_folding)

-        if code > MAX:
+        if code > MAX_BMP:
            continue

        folding_d = folding - code
@ -317,46 +358,58 @@ def generate_unicode_stuff(unicode_data, case_folding,
            folding_cache[item] = i = len(folding_table)
            folding_table.append(item)
        folding_index[code] = i
+    return (
+        folding_table, folding_index,
+        non_bmp_folding_map, non_bmp_rev_folding_map,
+        folding_tests
+    )

-    non_bmp_file.write("""/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * vim: set ts=8 sts=4 et sw=4 tw=99:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/* Generated by make_unicode.py DO NOT MODIFY */
-
+def make_non_bmp_file(version, casefold_version,
+                      non_bmp_lower_map, non_bmp_upper_map,
+                      non_bmp_folding_map, non_bmp_rev_folding_map):
+    file_name = 'UnicodeNonBMP.h';
+    with io.open(file_name, mode='wb') as non_bmp_file:
+        non_bmp_file.write(mpl_license)
+        non_bmp_file.write('\n')
+        non_bmp_file.write(warning_message)
+        non_bmp_file.write(unicode_version_message.format(version))
+        non_bmp_file.write(casefold_version_message.format(casefold_version))
+        non_bmp_file.write("""
 #ifndef vm_UnicodeNonBMP_h
 #define vm_UnicodeNonBMP_h

 """)

-    make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
-    non_bmp_file.write('\n')
-    make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map)
-    non_bmp_file.write('\n')
-    make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map)
-    non_bmp_file.write('\n')
-    make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map)
+        make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
+        non_bmp_file.write('\n')
+        make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map)
+        non_bmp_file.write('\n')
+        make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map)
+        non_bmp_file.write('\n')
+        make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map)

-    non_bmp_file.write("""
+        non_bmp_file.write("""
 #endif /* vm_UnicodeNonBMP_h */
 """)

-    test_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
-    test_mapping.write(public_domain)
-    test_mapping.write('var mapping = [\n')
-    for code in range(0, MAX + 1):
-        entry = test_table.get(code)
+def make_bmp_mapping_test(version, test_table):
+    file_name = '../tests/ecma_5/String/string-upper-lower-mapping.js'
+    with io.open(file_name, mode='wb') as test_mapping:
+        test_mapping.write(warning_message)
+        test_mapping.write(unicode_version_message.format(version))
+        test_mapping.write(public_domain)
+        test_mapping.write('var mapping = [\n')
+        for code in range(0, MAX_BMP + 1):
+            entry = test_table.get(code)

-        if entry:
-            (upper, lower, name, alias) = entry
-            test_mapping.write('  [' + hex(upper) + ', ' + hex(lower) + '], /* ' +
-                       name + (' (' + alias + ')' if alias else '') + ' */\n')
-        else:
-            test_mapping.write('  [' + hex(code) + ', ' + hex(code) + '],\n')
-    test_mapping.write('];')
-    test_mapping.write("""
+            if entry:
+                (upper, lower, name, alias) = entry
+                test_mapping.write('  [' + hex(upper) + ', ' + hex(lower) + '], /* ' +
+                        name + (' (' + alias + ')' if alias else '') + ' */\n')
+            else:
+                test_mapping.write('  [' + hex(code) + ', ' + hex(code) + '],\n')
+        test_mapping.write('];')
+        test_mapping.write("""
 assertEq(mapping.length, 0x10000);
 for (var i = 0; i <= 0xffff; i++) {
    var char = String.fromCharCode(i);
@ -370,28 +423,35 @@ if (typeof reportCompare === "function")
    reportCompare(true, true);
 """)

-    test_non_bmp_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
-    test_non_bmp_mapping.write(public_domain)
-    for code in sorted(non_bmp_upper_map.keys()):
-        test_non_bmp_mapping.write("""\
+def make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map):
+    file_name = '../tests/ecma_6/String/string-code-point-upper-lower-mapping.js'
+    with io.open(file_name, mode='wb') as test_non_bmp_mapping:
+        test_non_bmp_mapping.write(warning_message)
+        test_non_bmp_mapping.write(unicode_version_message.format(version))
+        test_non_bmp_mapping.write(public_domain)
+        for code in sorted(non_bmp_upper_map.keys()):
+            test_non_bmp_mapping.write("""\
 assertEq(String.fromCodePoint(0x{:x}).toUpperCase().codePointAt(0), 0x{:x});
 """.format(code, non_bmp_upper_map[code]))
-    for code in sorted(non_bmp_lower_map.keys()):
-        test_non_bmp_mapping.write("""\
+        for code in sorted(non_bmp_lower_map.keys()):
+            test_non_bmp_mapping.write("""\
 assertEq(String.fromCodePoint(0x{:x}).toLowerCase().codePointAt(0), 0x{:x});
 """.format(code, non_bmp_lower_map[code]))

-    test_non_bmp_mapping.write("""
+        test_non_bmp_mapping.write("""
 if (typeof reportCompare === "function")
    reportCompare(true, true);
 """)

-
-    test_space.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
-    test_space.write(public_domain)
-    test_space.write('var onlySpace = String.fromCharCode(' +
-                     ', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
-    test_space.write("""
+def make_space_test(version, test_space_table):
+    file_name = '../tests/ecma_5/String/string-space-trim.js'
+    with io.open(file_name, mode='wb') as test_space:
+        test_space.write(warning_message)
+        test_space.write(unicode_version_message.format(version))
+        test_space.write(public_domain)
+        test_space.write('var onlySpace = String.fromCharCode(' +
+                        ', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
+        test_space.write("""
 assertEq(onlySpace.trim(), "");
 assertEq((onlySpace + 'aaaa').trim(), 'aaaa');
 assertEq(('aaaa' + onlySpace).trim(), 'aaaa');
@ -401,9 +461,13 @@ if (typeof reportCompare === "function")
    reportCompare(true, true);
 """)

-    test_icase.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
-    test_icase.write(public_domain)
-    test_icase.write("""
+def make_icase_test(version, folding_tests):
+    file_name = '../tests/ecma_6/RegExp/unicode-ignoreCase.js'
+    with io.open(file_name, mode='wb') as test_icase:
+        test_icase.write(warning_message)
+        test_icase.write(unicode_version_message.format(version))
+        test_icase.write(public_domain)
+        test_icase.write("""
 var BUGNUMBER = 1135377;
 var summary = "Implement RegExp unicode flag -- ignoreCase flag.";

@ -417,26 +481,30 @@ function test(code, ...equivs) {
  assertEqArray(codeRe.exec("<" + ans + ">"), [ans]);
 }
 """)
-    for args in folding_tests:
-        test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
-    test_icase.write("""
+        for args in folding_tests:
+            test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
+        test_icase.write("""
 if (typeof reportCompare === "function")
    reportCompare(true, true);
 """)

+def make_unicode_file(version, casefold_version,
+                      table, index,
+                      same_upper_table, same_upper_index,
+                      folding_table, folding_index):
    index1, index2, shift = splitbins(index)

-    # Don't forget to update CharInfo in Unicode.cpp if you need to change this
+    # Don't forget to update CharInfo in Unicode.h if you need to change this
    assert shift == 5

    same_upper_index1, same_upper_index2, same_upper_shift = splitbins(same_upper_index)

-    # Don't forget to update CharInfo in Unicode.cpp if you need to change this
+    # Don't forget to update CodepointsWithSameUpperCaseInfo in Unicode.h if you need to change this
    assert same_upper_shift == 6

    folding_index1, folding_index2, folding_shift = splitbins(folding_index)

-    # Don't forget to update CharInfo in Unicode.cpp if you need to change this
+    # Don't forget to update CaseFoldInfo in Unicode.h if you need to change this
    assert folding_shift == 6

    # verify correctness
@ -512,20 +580,6 @@ if (typeof reportCompare === "function")
 *  stop if you found the best shift
 */
 """
-    data_file.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
-    data_file.write(public_domain)
-    data_file.write('#include "vm/Unicode.h"\n\n')
-    data_file.write('using namespace js;\n')
-    data_file.write('using namespace js::unicode;\n')
-    data_file.write(comment)
-    data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n')
-    for d in table:
-        data_file.write('    {')
-        data_file.write(', '.join((str(e) for e in d)))
-        data_file.write('},\n')
-    data_file.write('};\n')
-    data_file.write('\n')
-
    def dump(data, name, file):
        file.write('const uint8_t unicode::' + name + '[] = {\n')

@ -546,38 +600,54 @@ if (typeof reportCompare === "function")
        file.write('\n'.join(lines))
        file.write('\n};\n')

-    dump(index1, 'index1', data_file)
-    data_file.write('\n')
-    dump(index2, 'index2', data_file)
-    data_file.write('\n')
+    file_name = 'Unicode.cpp'
+    with io.open(file_name, 'wb') as data_file:
+        data_file.write(warning_message)
+        data_file.write(unicode_version_message.format(version))
+        data_file.write(casefold_version_message.format(casefold_version))
+        data_file.write(public_domain)
+        data_file.write('#include "vm/Unicode.h"\n\n')
+        data_file.write('using namespace js;\n')
+        data_file.write('using namespace js::unicode;\n')
+        data_file.write(comment)
+        data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n')
+        for d in table:
+            data_file.write('    {')
+            data_file.write(', '.join((str(e) for e in d)))
+            data_file.write('},\n')
+        data_file.write('};\n')
+        data_file.write('\n')

-    data_file.write('const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {\n')
-    for d in same_upper_table:
-        data_file.write('    {')
-        data_file.write(', '.join((str(e) for e in d)))
-        data_file.write('},\n')
-    data_file.write('};\n')
-    data_file.write('\n')
+        dump(index1, 'index1', data_file)
+        data_file.write('\n')
+        dump(index2, 'index2', data_file)
+        data_file.write('\n')

-    dump(same_upper_index1, 'codepoints_with_same_upper_index1', data_file)
-    data_file.write('\n')
-    dump(same_upper_index2, 'codepoints_with_same_upper_index2', data_file)
-    data_file.write('\n')
+        data_file.write('const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {\n')
+        for d in same_upper_table:
+            data_file.write('    {')
+            data_file.write(', '.join((str(e) for e in d)))
+            data_file.write('},\n')
+        data_file.write('};\n')
+        data_file.write('\n')

-    data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n')
-    for d in folding_table:
-        data_file.write('    {')
-        data_file.write(', '.join((str(e) for e in d)))
-        data_file.write('},\n')
-    data_file.write('};\n')
-    data_file.write('\n')
+        dump(same_upper_index1, 'codepoints_with_same_upper_index1', data_file)
+        data_file.write('\n')
+        dump(same_upper_index2, 'codepoints_with_same_upper_index2', data_file)
+        data_file.write('\n')

-    dump(folding_index1, 'folding_index1', data_file)
-    data_file.write('\n')
-    dump(folding_index2, 'folding_index2', data_file)
-    data_file.write('\n')
+        data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n')
+        for d in folding_table:
+            data_file.write('    {')
+            data_file.write(', '.join((str(e) for e in d)))
+            data_file.write('},\n')
+        data_file.write('};\n')
+        data_file.write('\n')

-    data_file.write('\n')
+        dump(folding_index1, 'folding_index1', data_file)
+        data_file.write('\n')
+        dump(folding_index2, 'folding_index2', data_file)
+        data_file.write('\n')

 def getsize(data):
    """ return smallest possible integer size for the given array """
@ -648,38 +718,124 @@ def splitbins(t):
        assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
    return best

-if __name__ == '__main__':
+def update_unicode(args):
    import urllib2

-    if len(sys.argv) > 1:
-        print('Always make sure you have the newest UnicodeData.txt!')
-        unicode_data = open(sys.argv[1], 'r')
-    else:
-        print('Downloading UnicodeData.txt...')
-        reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/UnicodeData.txt')
-        data = reader.read()
-        reader.close()
-        unicode_data = open('UnicodeData.txt', 'w+')
-        unicode_data.write(data)
-        unicode_data.seek(0)
+    def to_download_url(version):
+        baseurl = 'http://unicode.org/Public'
+        if version is 'UNIDATA':
+            return '%s/%s' % (baseurl, version)
+        return '%s/%s/ucd' % (baseurl, version)

-    if len(sys.argv) > 2:
-        print('Always make sure you have the newest CaseFolding.txt!')
-        case_folding = open(sys.argv[2], 'r')
-    else:
-        print('Downloading CaseFolding.txt...')
-        reader = urllib2.urlopen('http://unicode.org/Public/UNIDATA/CaseFolding.txt')
-        data = reader.read()
-        reader.close()
-        case_folding = open('CaseFolding.txt', 'w+')
-        case_folding.write(data)
-        case_folding.seek(0)
+    unicode_info = {
+        'name': 'Unicode',
+        'version': args.version,
+        'url': to_download_url(args.version),
+    }
+    # TODO: Remove this dict and use a single Unicode version when bug 1230490 has relanded.
+    casefold_info = {
+        'name': 'Casefold Unicode',
+        'version': args.casefold_version,
+        'url': to_download_url(args.casefold_version),
+    }
+
+    def print_info(info):
+        if info['version'] is not None:
+            print('\t%s version: %s' % (info['name'], info['version']))
+            print('\t%s download url: %s' % (info['name'], info['url']))
+        else:
+            print('\t%s uses local files.' % info['name'])
+            print('\tAlways make sure you have the newest Unicode files!')
+
+    print('Arguments:')
+    print_info(unicode_info)
+    print_info(casefold_info)
+    print('')
+
+    def download_or_open(info, fname):
+        tfile_path = os.path.join(os.getcwd(), fname)
+        if info['version'] is not None:
+            print('Downloading %s...' % fname)
+            unicode_data_url = '%s/%s' % (info['url'], fname)
+            with closing(urllib2.urlopen(unicode_data_url)) as reader:
+                data = reader.read()
+            tfile = io.open(tfile_path, 'w+b')
+            tfile.write(data)
+            tfile.flush()
+            tfile.seek(0)
+        else:
+            if not os.path.isfile(tfile_path):
+                raise RuntimeError('File not found: %s' % tfile_path)
+            tfile = io.open(tfile_path, 'rb');
+        return tfile
+
+    def version_from_file(f, fname):
+        pat_version = re.compile(r"# %s-(?P<version>\d+\.\d+\.\d+).txt" % fname)
+        (unicode_version) = pat_version.match(f.readline()).group("version")
+        return unicode_version
+
+    with download_or_open(unicode_info, 'UnicodeData.txt') as unicode_data, \
+         download_or_open(casefold_info, 'CaseFolding.txt') as case_folding, \
+         download_or_open(unicode_info, 'DerivedCoreProperties.txt') as derived_core_properties:
+        version = version_from_file(derived_core_properties, 'DerivedCoreProperties')
+        casefold_version = version_from_file(case_folding, 'CaseFolding')
+
+        print('Processing...')
+        (
+            table, index,
+            same_upper_table, same_upper_index,
+            non_bmp_lower_map, non_bmp_upper_map,
+            test_table, test_space_table
+        ) = process_unicode_data(unicode_data)
+        (
+            folding_table, folding_index,
+            non_bmp_folding_map, non_bmp_rev_folding_map,
+            folding_tests
+        ) = process_case_folding(case_folding)

    print('Generating...')
-    generate_unicode_stuff(unicode_data, case_folding,
-        open('Unicode.cpp', 'w'),
-        open('UnicodeNonBMP.h', 'w'),
-        open('../tests/ecma_5/String/string-upper-lower-mapping.js', 'w'),
-        open('../tests/ecma_6/String/string-code-point-upper-lower-mapping.js', 'w'),
-        open('../tests/ecma_5/String/string-space-trim.js', 'w'),
-        open('../tests/ecma_6/RegExp/unicode-ignoreCase.js', 'w'))
+    make_unicode_file(version, casefold_version,
+                      table, index,
+                      same_upper_table, same_upper_index,
+                      folding_table, folding_index)
+    make_non_bmp_file(version, casefold_version,
+                      non_bmp_lower_map, non_bmp_upper_map,
+                      non_bmp_folding_map, non_bmp_rev_folding_map)
+
+    make_bmp_mapping_test(version, test_table)
+    make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map)
+    make_space_test(version, test_space_table)
+    make_icase_test(casefold_version, folding_tests)
+
+if __name__ == '__main__':
+    import argparse
+
+    # This script must be run from js/src/vm to work correctly.
+    if '/'.join(os.path.normpath(os.getcwd()).split(os.sep)[-3:]) != 'js/src/vm':
+        raise RuntimeError('%s must be run from js/src/vm' % sys.argv[0])
+
+    # !!! IMPORTANT !!!
+    # We currently use two different Unicode versions (6.2 and 8.0) for
+    # separate parts of the engine. This is all just temporary until
+    # bug 1230490 has relanded. As soon as bug 1230490 has relanded, this
+    # script can be simplified by removing all logic to handle different
+    # Unicode versions.
+
+    parser = argparse.ArgumentParser(description='Update Unicode data.')
+
+    parser.add_argument('--version',
+                        help='Optional Unicode version number. If specified, downloads the\
+                              selected version from <http://unicode.org/Public>. If not specified\
+                              uses the existing local files to generate the Unicode data. The\
+                              number must match a published Unicode version, e.g. use\
+                              "--version=8.0.0" to download Unicode 8 files. Alternatively use\
+                              "--version=UNIDATA" to download the latest published version.')
+    # TODO: Remove this parameter when bug 1230490 has relanded.
+    parser.add_argument('--casefold-version',
+                        help='Unicode version number for case-folding data. Has the same meaning\
+                        as --version, except only used for case-folding data.')
+
+    parser.set_defaults(func=update_unicode)
+
+    args = parser.parse_args()
+    args.func(args)