[ruby/prism] Remove static literals dependence on parser definition

https://github.com/ruby/prism/commit/b3e104e8a2
This commit is contained in:
Kevin Newton 2024-04-24 14:57:56 -04:00 коммит произвёл git
Родитель 214811974b
Коммит 7d64fbda53
4 изменённых файлов: 133 добавлений и 45 удалений

Просмотреть файл

@ -1243,7 +1243,7 @@ static_inspect(int argc, VALUE *argv, VALUE self) {
pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0];
pm_buffer_t buffer = { 0 };
pm_static_literal_inspect(&buffer, &parser, node);
pm_static_literal_inspect(&buffer, &parser.newline_list, parser.start_line, parser.encoding->name, node);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);

Просмотреть файл

@ -13152,11 +13152,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
*/
static void
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
if (duplicated != NULL) {
pm_buffer_t buffer = { 0 };
pm_static_literal_inspect(&buffer, parser, duplicated);
pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
pm_diagnostic_list_append_format(
&parser->warning_list,
@ -13178,7 +13178,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
*/
static void
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
if (pm_static_literals_add(parser, literals, node) != NULL) {
if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
pm_diagnostic_list_append_format(
&parser->warning_list,
node->location.start,
@ -15678,7 +15678,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
*/
static void
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
if (pm_static_literals_add(parser, keys, node) != NULL) {
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
}
}

Просмотреть файл

@ -1,5 +1,21 @@
#include "prism/static_literals.h"
/**
* A small struct used for passing around a subset of the information that is
* stored on the parser. We use this to avoid having static literals explicitly
* depend on the parser struct.
*/
typedef struct {
/** The list of newline offsets to use to calculate line numbers. */
const pm_newline_list_t *newline_list;
/** The line number that the parser starts on. */
int32_t start_line;
/** The name of the encoding that the parser is using. */
const char *encoding_name;
} pm_static_literals_metadata_t;
static inline uint32_t
murmur_scramble(uint32_t value) {
value *= 0xcc9e2d51;
@ -48,7 +64,7 @@ murmur_hash(const uint8_t *key, size_t length) {
* these hashes to look for duplicates.
*/
static uint32_t
node_hash(const pm_parser_t *parser, const pm_node_t *node) {
node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE: {
// Integers hash their value.
@ -68,7 +84,7 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) {
}
case PM_SOURCE_LINE_NODE: {
// Source lines hash their line number.
const pm_line_column_t line_column = pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line);
const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
const int32_t *value = &line_column.line;
return murmur_hash((const uint8_t *) value, sizeof(int32_t));
}
@ -82,14 +98,14 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) {
// is stored as a subnode, we hash that node and then mix in the
// fact that this is a rational node.
const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type);
return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
}
case PM_IMAGINARY_NODE: {
// Imaginaries hash their numeric value. Because their numeric value
// is stored as a subnode, we hash that node and then mix in the
// fact that this is an imaginary node.
const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type);
return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
}
case PM_STRING_NODE: {
// Strings hash their value and mix in their flags so that different
@ -132,7 +148,7 @@ node_hash(const pm_parser_t *parser, const pm_node_t *node) {
* and must be able to compare all node types that will be stored in this hash.
*/
static pm_node_t *
pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) {
pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
// If we are out of space, we need to resize the hash. This will cause all
// of the nodes to be rehashed and reinserted into the new hash.
if (hash->size * 2 >= hash->capacity) {
@ -152,7 +168,7 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t *
pm_node_t *node = hash->nodes[index];
if (node != NULL) {
uint32_t index = node_hash(parser, node) & mask;
uint32_t index = node_hash(metadata, node) & mask;
new_nodes[index] = node;
}
}
@ -165,14 +181,14 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t *
// Now, insert the node into the hash.
uint32_t mask = hash->capacity - 1;
uint32_t index = node_hash(parser, node) & mask;
uint32_t index = node_hash(metadata, node) & mask;
// We use linear probing to resolve collisions. This means that if the
// current index is occupied, we will move to the next index and try again.
// We are guaranteed that this will eventually find an empty slot because we
// resize the hash when it gets too full.
while (hash->nodes[index] != NULL) {
if (compare(parser, hash->nodes[index], node) == 0) break;
if (compare(metadata, hash->nodes[index], node) == 0) break;
index = (index + 1) & mask;
}
@ -203,7 +219,7 @@ pm_node_hash_free(pm_node_hash_t *hash) {
* Return the integer value of the given node as an int64_t.
*/
static int64_t
pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) {
pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE: {
const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
@ -213,7 +229,7 @@ pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) {
return integer->negative ? -value : value;
}
case PM_SOURCE_LINE_NODE:
return (int64_t) pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line;
return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
default:
assert(false && "unreachable");
return 0;
@ -225,10 +241,10 @@ pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) {
* instances.
*/
static int
pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
int64_t left_value = pm_int64_value(parser, left);
int64_t right_value = pm_int64_value(parser, right);
int64_t left_value = pm_int64_value(metadata, left);
int64_t right_value = pm_int64_value(metadata, right);
return PM_NUMERIC_COMPARISON(left_value, right_value);
}
@ -241,7 +257,7 @@ pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const
* A comparison function for comparing two FloatNode instances.
*/
static int
pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
const double left_value = ((const pm_float_node_t *) left)->value;
const double right_value = ((const pm_float_node_t *) right)->value;
return PM_NUMERIC_COMPARISON(left_value, right_value);
@ -251,20 +267,20 @@ pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const p
* A comparison function for comparing two nodes that have attached numbers.
*/
static int
pm_compare_number_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
}
switch (PM_NODE_TYPE(left)) {
case PM_IMAGINARY_NODE:
return pm_compare_number_nodes(parser, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
case PM_RATIONAL_NODE:
return pm_compare_number_nodes(parser, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
case PM_INTEGER_NODE:
return pm_compare_integer_nodes(parser, left, right);
return pm_compare_integer_nodes(metadata, left, right);
case PM_FLOAT_NODE:
return pm_compare_float_nodes(parser, left, right);
return pm_compare_float_nodes(metadata, left, right);
default:
assert(false && "unreachable");
return 0;
@ -293,7 +309,7 @@ pm_string_value(const pm_node_t *node) {
* A comparison function for comparing two nodes that have attached strings.
*/
static int
pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
const pm_string_t *left_string = pm_string_value(left);
const pm_string_t *right_string = pm_string_value(right);
return pm_string_compare(left_string, right_string);
@ -303,7 +319,7 @@ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const
* A comparison function for comparing two RegularExpressionNode instances.
*/
static int
pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
@ -319,23 +335,77 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *pa
* Add a node to the set of static literals.
*/
pm_node_t *
pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE:
case PM_SOURCE_LINE_NODE:
return pm_node_hash_insert(&literals->integer_nodes, parser, node, pm_compare_integer_nodes);
return pm_node_hash_insert(
&literals->integer_nodes,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = NULL
},
node,
pm_compare_integer_nodes
);
case PM_FLOAT_NODE:
return pm_node_hash_insert(&literals->float_nodes, parser, node, pm_compare_float_nodes);
return pm_node_hash_insert(
&literals->float_nodes,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = NULL
},
node,
pm_compare_float_nodes
);
case PM_RATIONAL_NODE:
case PM_IMAGINARY_NODE:
return pm_node_hash_insert(&literals->number_nodes, parser, node, pm_compare_number_nodes);
return pm_node_hash_insert(
&literals->number_nodes,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = NULL
},
node,
pm_compare_number_nodes
);
case PM_STRING_NODE:
case PM_SOURCE_FILE_NODE:
return pm_node_hash_insert(&literals->string_nodes, parser, node, pm_compare_string_nodes);
return pm_node_hash_insert(
&literals->string_nodes,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = NULL
},
node,
pm_compare_string_nodes
);
case PM_REGULAR_EXPRESSION_NODE:
return pm_node_hash_insert(&literals->regexp_nodes, parser, node, pm_compare_regular_expression_nodes);
return pm_node_hash_insert(
&literals->regexp_nodes,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = NULL
},
node,
pm_compare_regular_expression_nodes
);
case PM_SYMBOL_NODE:
return pm_node_hash_insert(&literals->symbol_nodes, parser, node, pm_compare_string_nodes);
return pm_node_hash_insert(
&literals->symbol_nodes,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = NULL
},
node,
pm_compare_string_nodes
);
case PM_TRUE_NODE: {
pm_node_t *duplicated = literals->true_node;
literals->true_node = node;
@ -435,8 +505,8 @@ pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
/**
* Create a string-based representation of the given static literal.
*/
PRISM_EXPORTED_FUNCTION void
pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
static inline void
pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_FALSE_NODE:
pm_buffer_append_string(buffer, "false", 5);
@ -473,7 +543,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const
const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
pm_buffer_append_string(buffer, "(0", 2);
if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
pm_static_literal_inspect(buffer, parser, numeric);
pm_static_literal_inspect_node(buffer, metadata, numeric);
if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
pm_buffer_append_string(buffer, "i)", 2);
break;
@ -490,7 +560,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const
switch (PM_NODE_TYPE(numeric)) {
case PM_INTEGER_NODE:
pm_buffer_append_byte(buffer, '(');
pm_static_literal_inspect(buffer, parser, numeric);
pm_static_literal_inspect_node(buffer, metadata, numeric);
pm_buffer_append_string(buffer, "/1)", 3);
break;
case PM_FLOAT_NODE:
@ -517,7 +587,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const
break;
}
case PM_SOURCE_ENCODING_NODE:
pm_buffer_append_format(buffer, "#<Encoding:%s>", parser->encoding->name);
pm_buffer_append_format(buffer, "#<Encoding:%s>", metadata->encoding_name);
break;
case PM_SOURCE_FILE_NODE: {
const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
@ -527,7 +597,7 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const
break;
}
case PM_SOURCE_LINE_NODE:
pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line);
pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
break;
case PM_STRING_NODE: {
const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
@ -550,3 +620,19 @@ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const
break;
}
}
/**
* Create a string-based representation of the given static literal.
*/
PRISM_EXPORTED_FUNCTION void
pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
pm_static_literal_inspect_node(
buffer,
&(pm_static_literals_metadata_t) {
.newline_list = newline_list,
.start_line = start_line,
.encoding_name = encoding_name
},
node
);
}

Просмотреть файл

@ -8,8 +8,7 @@
#include "prism/defines.h"
#include "prism/ast.h"
#include "prism/node.h"
#include "prism/parser.h"
#include "prism/util/pm_newline_list.h"
#include <assert.h>
#include <stdbool.h>
@ -92,12 +91,13 @@ typedef struct {
/**
* Add a node to the set of static literals.
*
* @param parser The parser that created the node.
* @param newline_list The list of newline offsets to use to calculate lines.
* @param start_line The line number that the parser starts on.
* @param literals The set of static literals to add the node to.
* @param node The node to add to the set.
* @return A pointer to the node that is being overwritten, if there is one.
*/
pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node);
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node);
/**
* Free the internal memory associated with the given static literals set.
@ -110,9 +110,11 @@ void pm_static_literals_free(pm_static_literals_t *literals);
* Create a string-based representation of the given static literal.
*
* @param buffer The buffer to write the string to.
* @param parser The parser that created the node.
* @param newline_list The list of newline offsets to use to calculate lines.
* @param start_line The line number that the parser starts on.
* @param encoding_name The name of the encoding of the source being parsed.
* @param node The node to create a string representation of.
*/
PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
#endif