Bug 1298779 - Port jskwgen to Python. r=jwalden

2016-09-14 12:16:41 +09:00 · 2016-09-14 12:16:41 +09:00 · c0fc17d9f4
--- a/config/recurse.mk
+++ b/config/recurse.mk
@ -171,10 +171,6 @@ endif
 ifeq ($(MOZ_REPLACE_MALLOC_LINKAGE),dummy library)
 mozglue/build/target memory/replace/logalloc/replay/target: memory/replace/dummy/target
 endif
-# js/src/target can end up invoking js/src/host rules (through object files
-# depending on jsautokw.h, which depends on host_jskwgen, and that can't
-# happen at the same time (bug #1146738)
-js/src/target: js/src/host
 endif
 # Most things are built during compile (target/host), but some things happen during export
 # Those need to depend on config/export for system wrappers.
--- a/js/src/jsautokw.py
+++ b/js/src/jsautokw.py
@ -2,21 +2,212 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

-from __future__ import print_function
-
-import os
+import re
 import sys
-import subprocess

-def main(output, exe):
-    # moz.build passes in the exe name without any path, so to run it we need to
-    # prepend the './'
-    run_exe = exe if os.path.isabs(exe) else './%s' % exe
+def read_keyword_list(filename):
+    macro_pat = re.compile(r"^\s*macro\(([^,]+), *[^,]+, *[^\)]+\)\s*\\?$")

-    # Use universal_newlines so everything is '\n', which gets converted to
-    # '\r\n' when writing out the file in Windows.
-    data = subprocess.check_output([run_exe], universal_newlines=True)
-    output.write(data)
+    keyword_list = []
+    index = 0
+    with open(filename, 'r') as f:
+        for line in f:
+            m = macro_pat.search(line)
+            if m:
+                keyword_list.append((index, m.group(1)))
+                index += 1
+
+    assert(len(keyword_list) != 0)
+
+    return keyword_list
+
+def line(opt, s):
+    opt['output'].write('{}{}\n'.format('    ' * opt['indent_level'], s))
+
+def indent(opt):
+    opt['indent_level'] += 1
+
+def dedent(opt):
+    opt['indent_level'] -= 1
+
+def span_and_count_at(keyword_list, column):
+    assert(len(keyword_list) != 0);
+
+    chars_dict = {}
+    for index, keyword in keyword_list:
+        chars_dict[ord(keyword[column])] = True
+
+    chars = sorted(chars_dict.keys())
+    return chars[-1] - chars[0] + 1, len(chars)
+
+def optimal_switch_column(opt, keyword_list, columns, unprocessed_columns):
+    assert(len(keyword_list) != 0);
+    assert(unprocessed_columns != 0);
+
+    min_count = 0
+    min_span = 0
+    min_count_index = 0
+    min_span_index = 0
+
+    for index in range(0, unprocessed_columns):
+        span, count = span_and_count_at(keyword_list, columns[index])
+        assert(span != 0)
+
+        if span == 1:
+            assert(count == 1)
+            return 1, True
+
+        assert(count != 1)
+        if index == 0 or min_span > span:
+            min_span = span
+            min_span_index = index
+
+        if index == 0 or min_count > count:
+            min_count = count
+            min_count_index = index
+
+    if min_count <= opt['use_if_threshold']:
+        return min_count_index, True
+
+    return min_span_index, False
+
+def split_list_per_column(keyword_list, column):
+    assert(len(keyword_list) != 0);
+
+    column_dict = {}
+    for item in keyword_list:
+        index, keyword = item
+        per_column = column_dict.setdefault(keyword[column], [])
+        per_column.append(item)
+
+    return sorted(column_dict.items(), key=lambda (char, keyword): ord(char))
+
+def generate_letter_switch(opt, unprocessed_columns, keyword_list,
+                           columns=None):
+    assert(len(keyword_list) != 0);
+
+    if not columns:
+        columns = range(0, unprocessed_columns)
+
+    if len(keyword_list) == 1:
+        index, keyword = keyword_list[0]
+
+        if unprocessed_columns == 0:
+            line(opt, 'JSKW_GOT_MATCH({}) /* {} */'.format(index, keyword))
+            return
+
+        if unprocessed_columns > opt['char_tail_test_threshold']:
+            line(opt, 'JSKW_TEST_GUESS({}) /* {} */'.format(index, keyword))
+            return
+
+        conds = []
+        for column in columns[0:unprocessed_columns]:
+            quoted = repr(keyword[column])
+            conds.append('JSKW_AT({})=={}'.format(column, quoted))
+
+        line(opt, 'if ({}) {{'.format(' && '.join(conds)))
+
+        indent(opt)
+        line(opt, 'JSKW_GOT_MATCH({}) /* {} */'.format(index, keyword))
+        dedent(opt)
+
+        line(opt, '}')
+        line(opt, 'JSKW_NO_MATCH()')
+        return
+
+    assert(unprocessed_columns != 0);
+
+    optimal_column_index, use_if = optimal_switch_column(opt, keyword_list,
+                                                         columns,
+                                                         unprocessed_columns)
+    optimal_column = columns[optimal_column_index]
+
+    # Make a copy to avoid breaking passed list.
+    columns = columns[:]
+    columns[optimal_column_index] = columns[unprocessed_columns - 1]
+
+    list_per_column = split_list_per_column(keyword_list, optimal_column)
+
+    if not use_if:
+        line(opt, 'switch (JSKW_AT({})) {{'.format(optimal_column))
+
+    for char, keyword_list_per_column in list_per_column:
+        quoted = repr(char)
+        if use_if:
+            line(opt, 'if (JSKW_AT({}) == {}) {{'.format(optimal_column,
+                                                         quoted))
+        else:
+            line(opt, '  case {}:'.format(quoted))
+
+        indent(opt)
+        generate_letter_switch(opt, unprocessed_columns - 1,
+                               keyword_list_per_column, columns)
+        dedent(opt)
+
+        if use_if:
+            line(opt, '}')
+
+    if not use_if:
+        line(opt, '}')
+
+    line(opt, 'JSKW_NO_MATCH()')
+
+def split_list_per_length(keyword_list):
+    assert(len(keyword_list) != 0);
+
+    length_dict = {}
+    for item in keyword_list:
+        index, keyword = item
+        per_length = length_dict.setdefault(len(keyword), [])
+        per_length.append(item)
+
+    return sorted(length_dict.items(), key=lambda (length, keyword): length)
+
+def generate_switch(opt, keyword_list):
+    assert(len(keyword_list) != 0);
+
+    line(opt, '/*')
+    line(opt, ' * Generating switch for the list of {} entries:'.format(len(keyword_list)))
+    for index, keyword in keyword_list:
+        line(opt, ' * {}'.format(keyword))
+    line(opt, ' */')
+
+    list_per_length = split_list_per_length(keyword_list)
+
+    use_if = False
+    if len(list_per_length) < opt['use_if_threshold']:
+        use_if = True
+
+    if not use_if:
+        line(opt, 'switch (JSKW_LENGTH()) {')
+
+    for length, keyword_list_per_length in list_per_length:
+        if use_if:
+            line(opt, 'if (JSKW_LENGTH() == {}) {{'.format(length))
+        else:
+            line(opt, '  case {}:'.format(length))
+
+        indent(opt)
+        generate_letter_switch(opt, length, keyword_list_per_length)
+        dedent(opt)
+
+        if use_if:
+            line(opt, '}')
+
+    if not use_if:
+        line(opt, '}')
+    line(opt, 'JSKW_NO_MATCH()')
+
+def main(output, keywords_h):
+    keyword_list = read_keyword_list(keywords_h)
+
+    opt = {
+        'indent_level': 1,
+        'use_if_threshold': 3,
+        'char_tail_test_threshold': 4,
+        'output': output
+    }
+    generate_switch(opt, keyword_list)

 if __name__ == '__main__':
    main(sys.stdout, *sys.argv[1:])
--- a/js/src/jskwgen.cpp
+++ b/js/src/jskwgen.cpp
@ -1,425 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * vim: set ts=8 sts=4 et sw=4 tw=99:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include <assert.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "vm/Keywords.h"
-
-static const char * const keyword_list[] = {
-#define KEYWORD_STRING(keyword, name, type) #keyword,
-    FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_STRING)
-#undef KEYWORD_STRING
-};
-
-struct gen_opt {
-    FILE* output;                       /* output file for generated source */
-    unsigned use_if_threshold;          /* max number of choices to generate
-                                           "if" selector instead of "switch" */
-    unsigned char_tail_test_threshold;  /* max number of unprocessed columns
-                                           to use inlined char compare
-                                           for remaining chars and not generic
-                                           string compare code */
-    unsigned indent_level;              /* current source identation level */
-};
-
-static unsigned column_to_compare;
-
-static int
-length_comparator(const void* a, const void* b)
-{
-    const char* str1 = keyword_list[*(unsigned*)a];
-    const char* str2 = keyword_list[*(unsigned*)b];
-    return (int)strlen(str1) - (int)strlen(str2);
-}
-
-static int
-column_comparator(const void* a, const void* b)
-{
-    const char* str1 = keyword_list[*(unsigned*)a];
-    const char* str2 = keyword_list[*(unsigned*)b];
-    return (int)str1[column_to_compare] - (int)str2[column_to_compare];
-}
-
-static unsigned
-count_different_lengths(unsigned indexes[], unsigned nelem)
-{
-    unsigned nlength, current_length, i, l;
-
-    current_length = 0;
-    nlength = 0;
-    for (i = 0; i != nelem; ++i) {
-        l = (unsigned)strlen(keyword_list[indexes[i]]);
-        assert(l != 0);
-        if (current_length != l) {
-            ++nlength;
-            current_length = l;
-        }
-    }
-    return nlength;
-}
-
-static void
-find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column,
-                         unsigned* span_result, unsigned* count_result)
-{
-    unsigned i, count;
-    unsigned char c, prev, minc, maxc;
-
-    assert(nelem != 0);
-    minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column];
-    count = 1;
-    for (i = 1; i != nelem; ++i) {
-        c = (unsigned char)keyword_list[indexes[i]][column];
-        if (prev != c) {
-            prev = c;
-            ++count;
-            if (minc > c) {
-                minc = c;
-            } else if (maxc < c) {
-                maxc = c;
-            }
-        }
-    }
-
-    *span_result = maxc - minc + 1;
-    *count_result = count;
-}
-
-static unsigned
-find_optimal_switch_column(struct gen_opt* opt,
-                           unsigned indexes[], unsigned nelem,
-                           unsigned columns[], unsigned unprocessed_columns,
-                           int* use_if_result)
-{
-    unsigned i;
-    unsigned span, min_span, min_span_index;
-    unsigned nchar, min_nchar, min_nchar_index;
-
-    assert(unprocessed_columns != 0);
-    i = 0;
-    min_nchar = min_span = (unsigned)-1;
-    min_nchar_index = min_span_index = 0;
-    do {
-        column_to_compare = columns[i];
-        qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
-        find_char_span_and_count(indexes, nelem, column_to_compare,
-                                 &span, &nchar);
-        assert(span != 0);
-        if (span == 1) {
-            assert(nchar == 1);
-            *use_if_result = 1;
-            return 1;
-        }
-        assert(nchar != 1);
-        if (min_span > span) {
-            min_span = span;
-            min_span_index = i;
-        }
-        if (min_nchar > nchar) {
-            min_nchar = nchar;
-            min_nchar_index = i;
-        }
-    } while (++i != unprocessed_columns);
-
-    if (min_nchar <= opt->use_if_threshold) {
-        *use_if_result = 1;
-        i = min_nchar_index;
-    } else {
-        *use_if_result = 0;
-        i = min_span_index;
-    }
-
-    /*
-     * Restore order corresponding to i if it was destroyed by
-     * subsequent sort.
-     */
-    if (i != unprocessed_columns - 1) {
-        column_to_compare = columns[i];
-        qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
-    }
-
-    return i;
-}
-
-
-static void
-p(struct gen_opt* opt, const char* format, ...)
-{
-    va_list ap;
-
-    va_start(ap, format);
-    vfprintf(opt->output, format, ap);
-    va_end(ap);
-}
-
-/* Size for '\xxx' where xxx is octal escape */
-#define MIN_QUOTED_CHAR_BUFFER 7
-
-static char*
-qchar(char c, char* quoted_buffer)
-{
-    char* s;
-
-    s = quoted_buffer;
-    *s++ = '\'';
-    switch (c) {
-      case '\n': c = 'n'; goto one_char_escape;
-      case '\r': c = 'r'; goto one_char_escape;
-      case '\t': c = 't'; goto one_char_escape;
-      case '\f': c = 't'; goto one_char_escape;
-      case '\0': c = '0'; goto one_char_escape;
-      case '\'': goto one_char_escape;
-      one_char_escape:
-        *s++ = '\\';
-        break;
-      default:
-        if (!isprint(c)) {
-            *s++ = '\\';
-            *s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6)));
-            *s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3)));
-            c = (char)('0' + (0x7 & ((unsigned char)c)));
-        }
-    }
-    *s++ = c;
-    *s++ = '\'';
-    *s = '\0';
-    assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER);
-    return quoted_buffer;
-}
-
-static void
-nl(struct gen_opt* opt)
-{
-    putc('\n', opt->output);
-}
-
-static void
-indent(struct gen_opt* opt)
-{
-    unsigned n = opt->indent_level;
-    while (n != 0) {
-        --n;
-        fputs("    ", opt->output);
-    }
-}
-
-static void
-line(struct gen_opt* opt, const char* format, ...)
-{
-    va_list ap;
-
-    indent(opt);
-    va_start(ap, format);
-    vfprintf(opt->output, format, ap);
-    va_end(ap);
-    nl(opt);
-}
-
-static void
-generate_letter_switch_r(struct gen_opt* opt,
-                         unsigned indexes[], unsigned nelem,
-                         unsigned columns[], unsigned unprocessed_columns)
-{
-    char qbuf[MIN_QUOTED_CHAR_BUFFER];
-
-    assert(nelem != 0);
-    if (nelem == 1) {
-        unsigned kw_index = indexes[0];
-        const char* keyword = keyword_list[kw_index];
-
-        if (unprocessed_columns == 0) {
-            line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
-        } else if (unprocessed_columns > opt->char_tail_test_threshold) {
-            line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword);
-        } else {
-            unsigned i, column;
-
-            indent(opt); p(opt, "if (");
-            for (i = 0; i != unprocessed_columns; ++i) {
-                column = columns[i];
-                qchar(keyword[column], qbuf);
-                p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ",
-                  column, qbuf);
-            }
-            p(opt, ") {"); nl(opt);
-            ++opt->indent_level;
-            line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
-            --opt->indent_level;
-            line(opt, "}");
-            line(opt, "JSKW_NO_MATCH()");
-        }
-    } else {
-        unsigned optimal_column_index, optimal_column;
-        unsigned i;
-        int use_if;
-        char current;
-
-        assert(unprocessed_columns != 0);
-        optimal_column_index = find_optimal_switch_column(opt, indexes, nelem,
-                                                          columns,
-                                                          unprocessed_columns,
-                                                          &use_if);
-        optimal_column = columns[optimal_column_index];
-        columns[optimal_column_index] = columns[unprocessed_columns - 1];
-
-        if (!use_if)
-            line(opt, "switch (JSKW_AT(%u)) {", optimal_column);
-
-        current = keyword_list[indexes[0]][optimal_column];
-        for (i = 0; i != nelem;) {
-            unsigned same_char_begin = i;
-            char next = current;
-
-            for (++i; i != nelem; ++i) {
-                next = keyword_list[indexes[i]][optimal_column];
-                if (next != current)
-                    break;
-            }
-            qchar(current, qbuf);
-            if (use_if) {
-                line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf);
-            } else {
-                line(opt, "  case %s:", qbuf);
-            }
-            ++opt->indent_level;
-            generate_letter_switch_r(opt, indexes + same_char_begin,
-                                     i - same_char_begin,
-                                     columns, unprocessed_columns - 1);
-            --opt->indent_level;
-            if (use_if) {
-                line(opt, "}");
-            }
-            current = next;
-        }
-
-        if (!use_if) {
-            line(opt, "}");
-        }
-
-        columns[optimal_column_index] = optimal_column;
-
-        line(opt, "JSKW_NO_MATCH()");
-    }
-}
-
-static void
-generate_letter_switch(struct gen_opt* opt,
-                       unsigned indexes[], unsigned nelem,
-                       unsigned current_length)
-{
-    unsigned* columns;
-    unsigned i;
-
-    columns = (unsigned*) malloc(sizeof(columns[0]) * current_length);
-    if (!columns) {
-        perror("malloc");
-        exit(EXIT_FAILURE);
-    }
-    for (i = 0; i != current_length; ++i) {
-        columns[i] = i;
-    }
-    generate_letter_switch_r(opt, indexes, nelem, columns, current_length);
-    free(columns);
-}
-
-
-static void
-generate_switch(struct gen_opt* opt)
-{
-    unsigned* indexes;
-    unsigned nlength;
-    unsigned i, current;
-    int use_if;
-    unsigned nelem;
-
-    nelem = sizeof(keyword_list)/sizeof(keyword_list[0]);
-
-    line(opt, "/*");
-    line(opt, " * Generating switch for the list of %u entries:", nelem);
-    for (i = 0; i != nelem; ++i) {
-        line(opt, " * %s", keyword_list[i]);
-    }
-    line(opt, " */");
-
-    indexes = (unsigned*) malloc(sizeof(indexes[0]) * nelem);
-    if (!indexes) {
-        perror("malloc");
-        exit(EXIT_FAILURE);
-    }
-    for (i = 0; i != nelem; ++i)
-        indexes[i] = i;
-    qsort(indexes, nelem, sizeof(indexes[i]), length_comparator);
-    nlength = count_different_lengths(indexes, nelem);
-
-    use_if = (nlength <= opt->use_if_threshold);
-
-    if (!use_if)
-        line(opt, "switch (JSKW_LENGTH()) {");
-
-    current = (unsigned)strlen(keyword_list[indexes[0]]);
-    for (i = 0; i != nelem;) {
-        unsigned same_length_begin = i;
-        unsigned next = current;
-
-        for (++i; i != nelem; ++i) {
-            next = (unsigned)strlen(keyword_list[indexes[i]]);
-            if (next != current)
-                break;
-        }
-        if (use_if) {
-            line(opt, "if (JSKW_LENGTH() == %u) {", current);
-        } else {
-            line(opt, "  case %u:", current);
-        }
-        ++opt->indent_level;
-        generate_letter_switch(opt, indexes + same_length_begin,
-                               i - same_length_begin,
-                               current);
-        --opt->indent_level;
-        if (use_if) {
-            line(opt, "}");
-        }
-        current = next;
-    }
-    if (!use_if)
-        line(opt, "}");
-    line(opt, "JSKW_NO_MATCH()");
-    free(indexes);
-}
-
-int main(int argc, char** argv)
-{
-    struct gen_opt opt;
-
-    if (argc < 2) {
-        opt.output = stdout;
-    } else {
-        opt.output = fopen(argv[1], "w");
-        if (!opt.output) {
-            perror("fopen");
-            exit(EXIT_FAILURE);
-        }
-    }
-    opt.indent_level = 1;
-    opt.use_if_threshold = 3;
-    opt.char_tail_test_threshold = 4;
-
-    generate_switch(&opt);
-
-    if (opt.output != stdout) {
-        if (fclose(opt.output)) {
-            perror("fclose");
-            exit(EXIT_FAILURE);
-        }
-    }
-
-    return EXIT_SUCCESS;
-}
--- a/js/src/moz.build
+++ b/js/src/moz.build
@ -609,15 +609,11 @@ else:
        'perf/pm_stub.cpp'
    ]

-HostSimplePrograms([
-    'host_jskwgen',
-])
-
 GENERATED_FILES += ['jsautokw.h']
 jsautokw = GENERATED_FILES['jsautokw.h']
 jsautokw.script = 'jsautokw.py'
 jsautokw.inputs += [
-    '!host_jskwgen%s' % CONFIG['HOST_BIN_SUFFIX'],
+    'vm/Keywords.h'
 ]

 # JavaScript must be built shared, even for static builds, as it is used by