From 7d64fbda53bcbe7c68096a61eba0208517e2df5e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 24 Apr 2024 14:57:56 -0400 Subject: [PATCH] [ruby/prism] Remove static literals dependence on parser definition https://github.com/ruby/prism/commit/b3e104e8a2 --- prism/extension.c | 2 +- prism/prism.c | 8 +-- prism/static_literals.c | 154 +++++++++++++++++++++++++++++++--------- prism/static_literals.h | 14 ++-- 4 files changed, 133 insertions(+), 45 deletions(-) diff --git a/prism/extension.c b/prism/extension.c index 18f9dcf950..7b3f894478 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -1243,7 +1243,7 @@ static_inspect(int argc, VALUE *argv, VALUE self) { pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0]; pm_buffer_t buffer = { 0 }; - pm_static_literal_inspect(&buffer, &parser, node); + pm_static_literal_inspect(&buffer, &parser.newline_list, parser.start_line, parser.encoding->name, node); rb_encoding *encoding = rb_enc_find(parser.encoding->name); VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding); diff --git a/prism/prism.c b/prism/prism.c index d2fcf3e372..9c65152a43 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -13152,11 +13152,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) { */ static void pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { - const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node); + const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node); if (duplicated != NULL) { pm_buffer_t buffer = { 0 }; - pm_static_literal_inspect(&buffer, parser, duplicated); + pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated); pm_diagnostic_list_append_format( &parser->warning_list, @@ -13178,7 +13178,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter */ static void pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { - if (pm_static_literals_add(parser, literals, node) != NULL) { + if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) { pm_diagnostic_list_append_format( &parser->warning_list, node->location.start, @@ -15678,7 +15678,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca */ static void parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) { - if (pm_static_literals_add(parser, keys, node) != NULL) { + if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE); } } diff --git a/prism/static_literals.c b/prism/static_literals.c index 469bdfd5ea..08f61c81fa 100644 --- a/prism/static_literals.c +++ b/prism/static_literals.c @@ -1,5 +1,21 @@ #include "prism/static_literals.h" +/** + * A small struct used for passing around a subset of the information that is + * stored on the parser. We use this to avoid having static literals explicitly + * depend on the parser struct. + */ +typedef struct { + /** The list of newline offsets to use to calculate line numbers. */ + const pm_newline_list_t *newline_list; + + /** The line number that the parser starts on. */ + int32_t start_line; + + /** The name of the encoding that the parser is using. */ + const char *encoding_name; +} pm_static_literals_metadata_t; + static inline uint32_t murmur_scramble(uint32_t value) { value *= 0xcc9e2d51; @@ -48,7 +64,7 @@ murmur_hash(const uint8_t *key, size_t length) { * these hashes to look for duplicates. */ static uint32_t -node_hash(const pm_parser_t *parser, const pm_node_t *node) { +node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: { // Integers hash their value. @@ -68,7 +84,7 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) { } case PM_SOURCE_LINE_NODE: { // Source lines hash their line number. - const pm_line_column_t line_column = pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line); + const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line); const int32_t *value = &line_column.line; return murmur_hash((const uint8_t *) value, sizeof(int32_t)); } @@ -82,14 +98,14 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) { // is stored as a subnode, we hash that node and then mix in the // fact that this is a rational node. const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric; - return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type); + return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type); } case PM_IMAGINARY_NODE: { // Imaginaries hash their numeric value. Because their numeric value // is stored as a subnode, we hash that node and then mix in the // fact that this is an imaginary node. const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric; - return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type); + return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type); } case PM_STRING_NODE: { // Strings hash their value and mix in their flags so that different @@ -132,7 +148,7 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) { * and must be able to compare all node types that will be stored in this hash. */ static pm_node_t * -pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) { +pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) { // If we are out of space, we need to resize the hash. This will cause all // of the nodes to be rehashed and reinserted into the new hash. if (hash->size * 2 >= hash->capacity) { @@ -152,7 +168,7 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t * pm_node_t *node = hash->nodes[index]; if (node != NULL) { - uint32_t index = node_hash(parser, node) & mask; + uint32_t index = node_hash(metadata, node) & mask; new_nodes[index] = node; } } @@ -165,14 +181,14 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t * // Now, insert the node into the hash. uint32_t mask = hash->capacity - 1; - uint32_t index = node_hash(parser, node) & mask; + uint32_t index = node_hash(metadata, node) & mask; // We use linear probing to resolve collisions. This means that if the // current index is occupied, we will move to the next index and try again. // We are guaranteed that this will eventually find an empty slot because we // resize the hash when it gets too full. while (hash->nodes[index] != NULL) { - if (compare(parser, hash->nodes[index], node) == 0) break; + if (compare(metadata, hash->nodes[index], node) == 0) break; index = (index + 1) & mask; } @@ -203,7 +219,7 @@ pm_node_hash_free(pm_node_hash_t *hash) { * Return the integer value of the given node as an int64_t. */ static int64_t -pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) { +pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: { const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value; @@ -213,7 +229,7 @@ pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) { return integer->negative ? -value : value; } case PM_SOURCE_LINE_NODE: - return (int64_t) pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line; + return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line; default: assert(false && "unreachable"); return 0; @@ -225,10 +241,10 @@ pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) { * instances. */ static int -pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) { +pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) { - int64_t left_value = pm_int64_value(parser, left); - int64_t right_value = pm_int64_value(parser, right); + int64_t left_value = pm_int64_value(metadata, left); + int64_t right_value = pm_int64_value(metadata, right); return PM_NUMERIC_COMPARISON(left_value, right_value); } @@ -241,7 +257,7 @@ pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const * A comparison function for comparing two FloatNode instances. */ static int -pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) { +pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { const double left_value = ((const pm_float_node_t *) left)->value; const double right_value = ((const pm_float_node_t *) right)->value; return PM_NUMERIC_COMPARISON(left_value, right_value); @@ -251,20 +267,20 @@ pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const p * A comparison function for comparing two nodes that have attached numbers. */ static int -pm_compare_number_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) { +pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) { return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right)); } switch (PM_NODE_TYPE(left)) { case PM_IMAGINARY_NODE: - return pm_compare_number_nodes(parser, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric); + return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric); case PM_RATIONAL_NODE: - return pm_compare_number_nodes(parser, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric); + return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric); case PM_INTEGER_NODE: - return pm_compare_integer_nodes(parser, left, right); + return pm_compare_integer_nodes(metadata, left, right); case PM_FLOAT_NODE: - return pm_compare_float_nodes(parser, left, right); + return pm_compare_float_nodes(metadata, left, right); default: assert(false && "unreachable"); return 0; @@ -293,7 +309,7 @@ pm_string_value(const pm_node_t *node) { * A comparison function for comparing two nodes that have attached strings. */ static int -pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) { +pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { const pm_string_t *left_string = pm_string_value(left); const pm_string_t *right_string = pm_string_value(right); return pm_string_compare(left_string, right_string); @@ -303,7 +319,7 @@ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const * A comparison function for comparing two RegularExpressionNode instances. */ static int -pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) { +pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left; const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right; @@ -319,23 +335,77 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *pa * Add a node to the set of static literals. */ pm_node_t * -pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { +pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: case PM_SOURCE_LINE_NODE: - return pm_node_hash_insert(&literals->integer_nodes, parser, node, pm_compare_integer_nodes); + return pm_node_hash_insert( + &literals->integer_nodes, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = NULL + }, + node, + pm_compare_integer_nodes + ); case PM_FLOAT_NODE: - return pm_node_hash_insert(&literals->float_nodes, parser, node, pm_compare_float_nodes); + return pm_node_hash_insert( + &literals->float_nodes, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = NULL + }, + node, + pm_compare_float_nodes + ); case PM_RATIONAL_NODE: case PM_IMAGINARY_NODE: - return pm_node_hash_insert(&literals->number_nodes, parser, node, pm_compare_number_nodes); + return pm_node_hash_insert( + &literals->number_nodes, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = NULL + }, + node, + pm_compare_number_nodes + ); case PM_STRING_NODE: case PM_SOURCE_FILE_NODE: - return pm_node_hash_insert(&literals->string_nodes, parser, node, pm_compare_string_nodes); + return pm_node_hash_insert( + &literals->string_nodes, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = NULL + }, + node, + pm_compare_string_nodes + ); case PM_REGULAR_EXPRESSION_NODE: - return pm_node_hash_insert(&literals->regexp_nodes, parser, node, pm_compare_regular_expression_nodes); + return pm_node_hash_insert( + &literals->regexp_nodes, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = NULL + }, + node, + pm_compare_regular_expression_nodes + ); case PM_SYMBOL_NODE: - return pm_node_hash_insert(&literals->symbol_nodes, parser, node, pm_compare_string_nodes); + return pm_node_hash_insert( + &literals->symbol_nodes, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = NULL + }, + node, + pm_compare_string_nodes + ); case PM_TRUE_NODE: { pm_node_t *duplicated = literals->true_node; literals->true_node = node; @@ -435,8 +505,8 @@ pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) { /** * Create a string-based representation of the given static literal. */ -PRISM_EXPORTED_FUNCTION void -pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) { +static inline void +pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_FALSE_NODE: pm_buffer_append_string(buffer, "false", 5); @@ -473,7 +543,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric; pm_buffer_append_string(buffer, "(0", 2); if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+'); - pm_static_literal_inspect(buffer, parser, numeric); + pm_static_literal_inspect_node(buffer, metadata, numeric); if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*'); pm_buffer_append_string(buffer, "i)", 2); break; @@ -490,7 +560,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const switch (PM_NODE_TYPE(numeric)) { case PM_INTEGER_NODE: pm_buffer_append_byte(buffer, '('); - pm_static_literal_inspect(buffer, parser, numeric); + pm_static_literal_inspect_node(buffer, metadata, numeric); pm_buffer_append_string(buffer, "/1)", 3); break; case PM_FLOAT_NODE: @@ -517,7 +587,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const break; } case PM_SOURCE_ENCODING_NODE: - pm_buffer_append_format(buffer, "#", parser->encoding->name); + pm_buffer_append_format(buffer, "#", metadata->encoding_name); break; case PM_SOURCE_FILE_NODE: { const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath; @@ -527,7 +597,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const break; } case PM_SOURCE_LINE_NODE: - pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line); + pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line); break; case PM_STRING_NODE: { const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped; @@ -550,3 +620,19 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const break; } } + +/** + * Create a string-based representation of the given static literal. + */ +PRISM_EXPORTED_FUNCTION void +pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) { + pm_static_literal_inspect_node( + buffer, + &(pm_static_literals_metadata_t) { + .newline_list = newline_list, + .start_line = start_line, + .encoding_name = encoding_name + }, + node + ); +} diff --git a/prism/static_literals.h b/prism/static_literals.h index dd1f2e7f84..72706054c1 100644 --- a/prism/static_literals.h +++ b/prism/static_literals.h @@ -8,8 +8,7 @@ #include "prism/defines.h" #include "prism/ast.h" -#include "prism/node.h" -#include "prism/parser.h" +#include "prism/util/pm_newline_list.h" #include #include @@ -92,12 +91,13 @@ typedef struct { /** * Add a node to the set of static literals. * - * @param parser The parser that created the node. + * @param newline_list The list of newline offsets to use to calculate lines. + * @param start_line The line number that the parser starts on. * @param literals The set of static literals to add the node to. * @param node The node to add to the set. * @return A pointer to the node that is being overwritten, if there is one. */ -pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node); +pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node); /** * Free the internal memory associated with the given static literals set. @@ -110,9 +110,11 @@ void pm_static_literals_free(pm_static_literals_t *literals); * Create a string-based representation of the given static literal. * * @param buffer The buffer to write the string to. - * @param parser The parser that created the node. + * @param newline_list The list of newline offsets to use to calculate lines. + * @param start_line The line number that the parser starts on. + * @param encoding_name The name of the encoding of the source being parsed. * @param node The node to create a string representation of. */ -PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node); +PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node); #endif