[ruby/prism] Track both the unescaped bytes and source string for a regular expression so we can accurately set its encoding flags.

https://github.com/ruby/prism/commit/dc6dd3a926
This commit is contained in:
Kevin Menard 2024-02-08 16:27:59 -05:00 коммит произвёл git
Родитель 2d80b6093f
Коммит 82fb6a90d5
31 изменённых файлов: 259 добавлений и 126 удалений

Просмотреть файл

@ -248,7 +248,7 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
/**
* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
* can compare against it because invalid multibyte characters are not a thing
* in this encoding.
* in this encoding. It is also needed for handling Regexp encoding flags.
*/
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])

Просмотреть файл

@ -663,6 +663,17 @@ struct pm_parser {
*/
pm_string_t current_string;
/**
* This string is used to pass information from the lexer to the parser. When
* processing regular expressions we must track the string source for the expression
* as well as its unescaped representation. In that case, `current_string` will hold
* the unescaped value while this field will hold the translated source value. There
* are some escape sequences in regular expressions that will cause the associated
* source string to have a different value than the content of the expression so we
* must track this state separately.
*/
pm_string_t current_regular_expression_source;
/**
* The line number at the start of the parse. This will be used to offset
* the line numbers of all of the locations.

Просмотреть файл

@ -5949,6 +5949,34 @@ parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
return 0;
}
/**
* Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
* the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
* when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
* may be explicitly set with an escape sequence.
*/
static inline pm_node_flags_t
parse_regular_expression_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
// Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all regular expressions
// appearing in source are eligible for "downgrading" to US-ASCII.
if (pm_ascii_only_p(contents)) {
return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
}
// A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
// or by specifying a modifier.
//
// NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
if (parser->explicit_encoding != NULL) {
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
}
}
return 0;
}
/**
* Allocate and initialize a new SymbolNode node with the given unescaped
* string.
@ -8130,34 +8158,34 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
* source so that the regular expression engine will perform its own unescaping.
*/
static inline void
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
uint8_t byte2 = (uint8_t) (byte & 0xF);
if (byte1 >= 0xA) {
pm_buffer_append_byte(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
} else {
pm_buffer_append_byte(buffer, (uint8_t) (byte1 + '0'));
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
}
if (byte2 >= 0xA) {
pm_buffer_append_byte(buffer, (uint8_t) (byte2 - 0xA + 'A'));
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
} else {
pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
}
} else {
escape_write_byte_encoded(parser, buffer, byte);
}
escape_write_byte_encoded(parser, buffer, byte);
}
/**
* Read the value of an escape into the buffer.
*/
static void
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
switch (peek(parser)) {
case '\\': {
parser->current.end++;
@ -8248,10 +8276,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
} else {
escape_write_byte_encoded(parser, buffer, value);
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
}
escape_write_byte_encoded(parser, buffer, value);
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
}
@ -8272,10 +8300,9 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
uint32_t value = escape_unicode(parser->current.end, 4);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
} else {
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
}
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
parser->current.end += 4;
} else if (peek(parser) == '{') {
@ -8306,10 +8333,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
extra_codepoints_start = unicode_start;
}
if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
}
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
}
@ -8327,7 +8352,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
}
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
@ -8346,7 +8371,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
switch (peeked) {
case '?': {
parser->current.end++;
escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
return;
}
case '\\':
@ -8355,7 +8380,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
return;
}
parser->current.end++;
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
default: {
if (!char_is_ascii_printable(peeked)) {
@ -8364,7 +8389,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
}
}
@ -8386,7 +8411,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
switch (peeked) {
case '?': {
parser->current.end++;
escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
return;
}
case '\\':
@ -8395,7 +8420,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
return;
}
parser->current.end++;
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
default: {
if (!char_is_ascii_printable(peeked)) {
@ -8404,7 +8429,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
}
}
@ -8429,7 +8454,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
return;
}
parser->current.end++;
escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
return;
}
@ -8439,7 +8464,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
return;
}
case '\r': {
@ -8510,7 +8535,7 @@ lex_question_mark(pm_parser_t *parser) {
pm_buffer_t buffer;
pm_buffer_init_capacity(&buffer, 3);
escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
return PM_TOKEN_CHARACTER_LITERAL;
@ -8724,7 +8749,7 @@ parser_end_of_line_p(const pm_parser_t *parser) {
* "foo\n"
*
* then the bytes in the string are "f", "o", "o", "\", "n", but we want to
* provide out consumers with the string content "f", "o", "o", "\n". In these
* provide our consumers with the string content "f", "o", "o", "\n". In these
* cases, when we find the first escape sequence, we initialize a pm_buffer_t
* to keep track of the string content. Then in the parser, it will
* automatically attach the string content to the node that it belongs to.
@ -8736,6 +8761,20 @@ typedef struct {
*/
pm_buffer_t buffer;
/**
* In order to properly set a regular expression's encoding and to validate
* the byte sequence for the underlying encoding we must process any escape
* sequences. The unescaped byte sequence will be stored in `buffer` just like
* for other string-like types. However, we also need to store the regular
* expression's source string. That string may different from the what we see
* during lexing because some escape sequences rewrite the source.
*
* This value will only be initialized for regular expressions and only if we
* receive an escape sequence. It will contain the regular expression's source
* string's byte sequence.
*/
pm_buffer_t regular_expression_buffer;
/**
* The cursor into the source string that points to how far we have
* currently copied into the buffer.
@ -8751,19 +8790,29 @@ pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->buffer, byte);
}
static inline void
pm_token_buffer_push_byte_regular_expression(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->regular_expression_buffer, byte);
}
/**
* Append the given bytes into the token buffer.
*/
static inline void
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length, uint8_t flags) {
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(&token_buffer->regular_expression_buffer, bytes, length);
}
}
/**
* Push an escaped character into the token buffer.
*/
static inline void
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser, uint8_t flags) {
// First, determine the width of the character to be escaped.
size_t width;
if (parser->encoding_changed) {
@ -8777,7 +8826,7 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
width = (width == 0 ? 1 : width);
// Now, push the bytes into the buffer.
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width, flags);
parser->current.end += width;
}
@ -8790,6 +8839,7 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
static inline void
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
pm_string_owned_init(&parser->current_regular_expression_source, (uint8_t *) token_buffer->regular_expression_buffer.value, token_buffer->regular_expression_buffer.length);
}
/**
@ -8805,8 +8855,10 @@ static void
pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
if (token_buffer->cursor == NULL) {
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
pm_string_shared_init(&parser->current_regular_expression_source, parser->current.start, parser->current.end);
} else {
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
pm_buffer_append_bytes(&token_buffer->regular_expression_buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
pm_token_buffer_copy(parser, token_buffer);
}
}
@ -8824,6 +8876,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->cursor == NULL) {
pm_buffer_init_capacity(&token_buffer->buffer, 16);
pm_buffer_init_capacity(&token_buffer->regular_expression_buffer, 16);
start = parser->current.start;
} else {
start = token_buffer->cursor;
@ -8831,6 +8884,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *end = parser->current.end - 1;
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
pm_buffer_append_bytes(&token_buffer->regular_expression_buffer, start, (size_t) (end - start));
token_buffer->cursor = end;
}
@ -10143,7 +10197,7 @@ parser_lex(pm_parser_t *parser) {
// If we haven't found an escape yet, then this buffer will be
// unallocated since we can refer directly to the source string.
pm_token_buffer_t token_buffer = { { 0 }, 0 };
pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
while (breakpoint != NULL) {
// If we hit a null byte, skip directly past it.
@ -10242,10 +10296,10 @@ parser_lex(pm_parser_t *parser) {
pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.list.interpolation) {
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
} else {
pm_token_buffer_push_byte(&token_buffer, '\\');
pm_token_buffer_push_escaped(&token_buffer, parser);
pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_NONE);
}
break;
@ -10320,7 +10374,7 @@ parser_lex(pm_parser_t *parser) {
// characters.
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
pm_token_buffer_t token_buffer = { { 0 }, 0 };
pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
while (breakpoint != NULL) {
// If we hit a null byte, skip directly past it.
@ -10403,9 +10457,10 @@ parser_lex(pm_parser_t *parser) {
parser->current.end++;
if (peek(parser) != '\n') {
if (lex_mode->as.regexp.terminator != '\r') {
pm_token_buffer_push_byte(&token_buffer, '\\');
pm_token_buffer_push_byte_regular_expression(&token_buffer, '\\');
}
pm_token_buffer_push_byte(&token_buffer, '\r');
pm_token_buffer_push_byte_regular_expression(&token_buffer, '\r');
break;
}
/* fallthrough */
@ -10429,7 +10484,7 @@ parser_lex(pm_parser_t *parser) {
case 'M':
case 'u':
case 'x':
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
escape_read(parser, &token_buffer.buffer, &token_buffer.regular_expression_buffer, PM_ESCAPE_FLAG_REGEXP);
break;
default:
if (lex_mode->as.regexp.terminator == peeked) {
@ -10440,19 +10495,20 @@ parser_lex(pm_parser_t *parser) {
case '$': case ')': case '*': case '+':
case '.': case '>': case '?': case ']':
case '^': case '|': case '}':
pm_token_buffer_push_byte(&token_buffer, '\\');
pm_token_buffer_push_byte_regular_expression(&token_buffer, '\\');
break;
default:
break;
}
pm_token_buffer_push_byte(&token_buffer, peeked);
pm_token_buffer_push_byte_regular_expression(&token_buffer, peeked);
parser->current.end++;
break;
}
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
pm_token_buffer_push_escaped(&token_buffer, parser);
if (peeked < 0x80) pm_token_buffer_push_byte_regular_expression(&token_buffer, '\\');
pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_REGEXP);
break;
}
@ -10525,7 +10581,7 @@ parser_lex(pm_parser_t *parser) {
// If we haven't found an escape yet, then this buffer will be
// unallocated since we can refer directly to the source string.
pm_token_buffer_t token_buffer = { { 0 }, 0 };
pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and
@ -10660,10 +10716,10 @@ parser_lex(pm_parser_t *parser) {
pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.string.interpolation) {
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
} else {
pm_token_buffer_push_byte(&token_buffer, '\\');
pm_token_buffer_push_escaped(&token_buffer, parser);
pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_NONE);
}
break;
@ -10813,7 +10869,7 @@ parser_lex(pm_parser_t *parser) {
}
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
pm_token_buffer_t token_buffer = { { 0 }, 0 };
pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
bool was_line_continuation = false;
while (breakpoint != NULL) {
@ -10935,7 +10991,7 @@ parser_lex(pm_parser_t *parser) {
continue;
default:
pm_token_buffer_push_byte(&token_buffer, '\\');
pm_token_buffer_push_escaped(&token_buffer, parser);
pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_NONE);
break;
}
} else {
@ -10972,7 +11028,7 @@ parser_lex(pm_parser_t *parser) {
breakpoint = parser->current.end;
continue;
default:
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
break;
}
}
@ -16948,7 +17004,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
};
parser_lex(parser);
return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
pm_node_t *regular_expression_node = (pm_node_t *) (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
pm_node_flag_set(regular_expression_node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
return regular_expression_node;
}
pm_interpolated_regular_expression_node_t *node;
@ -16959,6 +17019,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// following token is the end (in which case we can return a plain
// regular expression) or if it's not then it has interpolation.
pm_string_t unescaped = parser->current_string;
pm_string_t source = parser->current_regular_expression_source;
pm_token_t content = parser->current;
parser_lex(parser);
@ -16966,7 +17027,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// without interpolation, which can be represented more succinctly and
// more easily compiled.
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
pm_node_t *regular_expression_node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &source);
pm_node_flag_set(regular_expression_node, parse_regular_expression_encoding(parser, &unescaped));
return regular_expression_node;
}
// If we get here, then we have interpolation so we'll need to create
@ -18527,6 +18590,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.newline_list = { 0 },
.integer_base = 0,
.current_string = PM_STRING_EMPTY,
.current_regular_expression_source = PM_STRING_EMPTY,
.start_line = 1,
.explicit_encoding = NULL,
.command_line = 0,

Просмотреть файл

@ -149,6 +149,7 @@ module Prism
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
symbols = [:a, :ą, :+]
regexps = [/a/, /ą/, //]
encodings.each_key do |encoding|
define_method(:"test_encoding_flags_#{encoding.name}") do
@ -168,6 +169,18 @@ module Prism
end
end
encodings.each_key do |encoding|
define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do
assert_regular_expression_encoding_flags(encoding, regexps.map(&:inspect))
end
end
encodings.each_key do |encoding|
define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do
assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" })
end
end
def test_coding
result = Prism.parse("# coding: utf-8\n'string'")
actual = result.value.statements.body.first.unescaped.encoding
@ -454,5 +467,50 @@ module Prism
assert_equal expected, actual
end
end
def assert_regular_expression_encoding_flags(encoding, regexps)
regexps.each do |regexp|
source = "# encoding: #{encoding.name}\n#{regexp}"
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 character in non UTF-8 regexp") || error.message.include?("escaped non ASCII character in UTF-8 regexp")
error.message[/: (.+?)\n/, 1]
elsif error.message.include?("invalid multibyte char")
# TODO (nirvdrum 26-Jan-2024): Bail out early of the rest of the test due to https://github.com/ruby/prism/issues/2104.
next
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
regexp = result.value.statements.body.first
if regexp.forced_utf8_encoding?
Encoding::UTF_8
elsif regexp.forced_binary_encoding?
Encoding::ASCII_8BIT
elsif regexp.forced_us_ascii_encoding?
Encoding::US_ASCII
else
encoding
end
else
error = result.errors.last
unless error.message.include?("UTF-8 mixed within")
raise error.message
end
end
end
assert_equal expected, actual
end
end
end
end

Просмотреть файл

@ -21,7 +21,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 2)
│ │ ├── @ RegularExpressionNode (location: (1,15)-(1,21))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (1,15)-(1,16) = "/"
│ │ │ ├── content_loc: (1,16)-(1,20) = "^\\s{"
│ │ │ ├── closing_loc: (1,20)-(1,21) = "/"
@ -52,7 +52,7 @@
│ ├── flags: ∅
│ └── arguments: (length: 2)
│ ├── @ RegularExpressionNode (location: (5,15)-(5,21))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (5,15)-(5,16) = "/"
│ │ ├── content_loc: (5,16)-(5,20) = "^\\s{"
│ │ ├── closing_loc: (5,20)-(5,21) = "/"

Просмотреть файл

@ -100,7 +100,7 @@
│ ├── closing_loc: (37,3)-(38,0) = "\n"
│ └── unescaped: "foo"
└── @ RegularExpressionNode (location: (39,0)-(41,0))
├── flags:
├── flags: forced_us_ascii_encoding
├── opening_loc: (39,0)-(40,0) = "%r\n"
├── content_loc: (40,0)-(40,3) = "foo"
├── closing_loc: (40,3)-(41,0) = "\n"

Просмотреть файл

@ -165,7 +165,7 @@
│ │ └── block: ∅
│ ├── pattern:
│ │ @ RegularExpressionNode (location: (9,7)-(9,12))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (9,7)-(9,8) = "/"
│ │ ├── content_loc: (9,8)-(9,11) = "foo"
│ │ ├── closing_loc: (9,11)-(9,12) = "/"
@ -719,14 +719,14 @@
│ │ ├── flags: ∅
│ │ ├── left:
│ │ │ @ RegularExpressionNode (location: (35,7)-(35,12))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (35,7)-(35,8) = "/"
│ │ │ ├── content_loc: (35,8)-(35,11) = "foo"
│ │ │ ├── closing_loc: (35,11)-(35,12) = "/"
│ │ │ └── unescaped: "foo"
│ │ ├── right:
│ │ │ @ RegularExpressionNode (location: (35,16)-(35,21))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (35,16)-(35,17) = "/"
│ │ │ ├── content_loc: (35,17)-(35,20) = "foo"
│ │ │ ├── closing_loc: (35,20)-(35,21) = "/"
@ -2543,7 +2543,7 @@
│ │ └── block: ∅
│ ├── pattern:
│ │ @ RegularExpressionNode (location: (112,7)-(112,12))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (112,7)-(112,8) = "/"
│ │ ├── content_loc: (112,8)-(112,11) = "foo"
│ │ ├── closing_loc: (112,11)-(112,12) = "/"
@ -3126,7 +3126,7 @@
│ │ └── @ InNode (location: (143,10)-(143,23))
│ │ ├── pattern:
│ │ │ @ RegularExpressionNode (location: (143,13)-(143,18))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (143,13)-(143,14) = "/"
│ │ │ ├── content_loc: (143,14)-(143,17) = "foo"
│ │ │ ├── closing_loc: (143,17)-(143,18) = "/"
@ -3914,7 +3914,7 @@
│ │ │ │ @ StatementsNode (location: (170,13)-(170,18))
│ │ │ │ └── body: (length: 1)
│ │ │ │ └── @ RegularExpressionNode (location: (170,13)-(170,18))
│ │ │ │ ├── flags:
│ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ ├── opening_loc: (170,13)-(170,14) = "/"
│ │ │ │ ├── content_loc: (170,14)-(170,17) = "foo"
│ │ │ │ ├── closing_loc: (170,17)-(170,18) = "/"

Просмотреть файл

@ -15,7 +15,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 1)
│ │ └── @ RegularExpressionNode (location: (1,4)-(1,9))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,4)-(1,5) = "/"
│ │ ├── content_loc: (1,5)-(1,8) = "bar"
│ │ ├── closing_loc: (1,8)-(1,9) = "/"
@ -23,13 +23,13 @@
│ ├── closing_loc: ∅
│ └── block: ∅
├── @ RegularExpressionNode (location: (3,0)-(3,8))
│ ├── flags: ignore_case
│ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (3,0)-(3,3) = "%r{"
│ ├── content_loc: (3,3)-(3,6) = "abc"
│ ├── closing_loc: (3,6)-(3,8) = "}i"
│ └── unescaped: "abc"
├── @ RegularExpressionNode (location: (5,0)-(5,5))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (5,0)-(5,1) = "/"
│ ├── content_loc: (5,1)-(5,4) = "a\\b"
│ ├── closing_loc: (5,4)-(5,5) = "/"
@ -92,7 +92,7 @@
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── receiver:
│ │ │ │ │ @ RegularExpressionNode (location: (11,1)-(11,14))
│ │ │ │ │ ├── flags:
│ │ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ │ ├── opening_loc: (11,1)-(11,2) = "/"
│ │ │ │ │ ├── content_loc: (11,2)-(11,13) = "(?<foo>bar)"
│ │ │ │ │ ├── closing_loc: (11,13)-(11,14) = "/"
@ -127,31 +127,31 @@
│ ├── opening_loc: (11,0)-(11,1) = "["
│ └── closing_loc: (11,26)-(11,27) = "]"
├── @ RegularExpressionNode (location: (13,0)-(13,6))
│ ├── flags: ignore_case
│ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (13,0)-(13,1) = "/"
│ ├── content_loc: (13,1)-(13,4) = "abc"
│ ├── closing_loc: (13,4)-(13,6) = "/i"
│ └── unescaped: "abc"
├── @ RegularExpressionNode (location: (15,0)-(15,26))
│ ├── flags: ignore_case
│ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (15,0)-(15,3) = "%r/"
│ ├── content_loc: (15,3)-(15,24) = "[a-z$._?][\\w$.?\#@~]*:"
│ ├── closing_loc: (15,24)-(15,26) = "/i"
│ └── unescaped: "[a-z$._?][\\w$.?\#@~]*:"
├── @ RegularExpressionNode (location: (17,0)-(17,37))
│ ├── flags: ignore_case
│ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (17,0)-(17,3) = "%r/"
│ ├── content_loc: (17,3)-(17,35) = "([a-z$._?][\\w$.?\#@~]*)(\\s+)(equ)"
│ ├── closing_loc: (17,35)-(17,37) = "/i"
│ └── unescaped: "([a-z$._?][\\w$.?\#@~]*)(\\s+)(equ)"
├── @ RegularExpressionNode (location: (19,0)-(19,25))
│ ├── flags: ignore_case
│ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (19,0)-(19,3) = "%r/"
│ ├── content_loc: (19,3)-(19,23) = "[a-z$._?][\\w$.?\#@~]*"
│ ├── closing_loc: (19,23)-(19,25) = "/i"
│ └── unescaped: "[a-z$._?][\\w$.?\#@~]*"
├── @ RegularExpressionNode (location: (21,0)-(24,1))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (21,0)-(21,3) = "%r("
│ ├── content_loc: (21,3)-(24,0) = "\n(?:[\\w\#$%_']|\\(\\)|\\(,\\)|\\[\\]|[0-9])*\n (?:[\\w\#$%_']+)\n"
│ ├── closing_loc: (24,0)-(24,1) = ")"
@ -160,7 +160,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (26,0)-(26,8))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (26,0)-(26,1) = "/"
│ │ ├── content_loc: (26,1)-(26,7) = "(?#\\))"
│ │ ├── closing_loc: (26,7)-(26,8) = "/"
@ -182,7 +182,7 @@
│ ├── closing_loc: ∅
│ └── block: ∅
├── @ RegularExpressionNode (location: (28,0)-(28,9))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (28,0)-(28,3) = "%r#"
│ ├── content_loc: (28,3)-(28,8) = "pound"
│ ├── closing_loc: (28,8)-(28,9) = "#"
@ -220,7 +220,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (32,0)-(33,4))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (32,0)-(32,1) = "/"
│ │ │ ├── content_loc: (32,1)-(33,3) = "(?<a\\\nb>)"
│ │ │ ├── closing_loc: (33,3)-(33,4) = "/"
@ -254,7 +254,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (35,0)-(35,18))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (35,0)-(35,1) = "/"
│ │ │ ├── content_loc: (35,1)-(35,17) = "(?<abc>)(?<abc>)"
│ │ │ ├── closing_loc: (35,17)-(35,18) = "/"
@ -286,7 +286,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (37,0)-(37,10))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (37,0)-(37,1) = "/"
│ │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)"
│ │ ├── closing_loc: (37,9)-(37,10) = "/"
@ -338,7 +338,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
│ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
│ │ │ ├── closing_loc: (40,13)-(40,14) = "/"

Просмотреть файл

@ -70,7 +70,7 @@
│ ├── opening_loc: (26,0)-(26,3) = "%i["
│ └── closing_loc: (29,0)-(29,1) = "]"
├── @ RegularExpressionNode (location: (31,0)-(34,1))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (31,0)-(31,3) = "%r["
│ ├── content_loc: (31,3)-(34,0) = "\n\n\n"
│ ├── closing_loc: (34,0)-(34,1) = "]"

Просмотреть файл

@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,6))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,6))
├── flags:
├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,3) = "%r'"
├── content_loc: (1,3)-(1,5) = "\\'"
├── closing_loc: (1,5)-(1,6) = "'"

Просмотреть файл

@ -16,7 +16,7 @@
│ ├── keyword_loc: (1,9)-(1,13) = "when"
│ ├── conditions: (length: 1)
│ │ └── @ RegularExpressionNode (location: (1,14)-(1,17))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,14)-(1,15) = "/"
│ │ ├── content_loc: (1,15)-(1,16) = "x"
│ │ ├── closing_loc: (1,16)-(1,17) = "/"

Просмотреть файл

@ -10,7 +10,7 @@
│ ├── keyword_loc: (1,6)-(1,10) = "when"
│ ├── conditions: (length: 1)
│ │ └── @ RegularExpressionNode (location: (1,11)-(1,23))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,11)-(1,14) = "%r%"
│ │ ├── content_loc: (1,14)-(1,22) = "blahblah"
│ │ ├── closing_loc: (1,22)-(1,23) = "%"

Просмотреть файл

@ -338,7 +338,7 @@
│ │ └── @ InNode (location: (46,0)-(46,11))
│ │ ├── pattern:
│ │ │ @ RegularExpressionNode (location: (46,3)-(46,11))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (46,3)-(46,4) = "/"
│ │ │ ├── content_loc: (46,4)-(46,10) = "regexp"
│ │ │ ├── closing_loc: (46,10)-(46,11) = "/"

Просмотреть файл

@ -4,31 +4,31 @@
@ StatementsNode (location: (1,0)-(9,13))
└── body: (length: 5)
├── @ RegularExpressionNode (location: (1,0)-(1,5))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (1,0)-(1,1) = "/"
│ ├── content_loc: (1,1)-(1,4) = "wtf"
│ ├── closing_loc: (1,4)-(1,5) = "/"
│ └── unescaped: "wtf"
├── @ RegularExpressionNode (location: (3,0)-(3,6))
│ ├── flags: multi_line
│ ├── flags: multi_line, forced_us_ascii_encoding
│ ├── opening_loc: (3,0)-(3,1) = "/"
│ ├── content_loc: (3,1)-(3,4) = "wtf"
│ ├── closing_loc: (3,4)-(3,6) = "/m"
│ └── unescaped: "wtf"
├── @ RegularExpressionNode (location: (5,0)-(5,6))
│ ├── flags: ascii_8bit
│ ├── flags: ascii_8bit, forced_us_ascii_encoding
│ ├── opening_loc: (5,0)-(5,1) = "/"
│ ├── content_loc: (5,1)-(5,4) = "wtf"
│ ├── closing_loc: (5,4)-(5,6) = "/n"
│ └── unescaped: "wtf"
├── @ RegularExpressionNode (location: (7,0)-(7,7))
│ ├── flags: multi_line, ascii_8bit
│ ├── flags: multi_line, ascii_8bit, forced_us_ascii_encoding
│ ├── opening_loc: (7,0)-(7,1) = "/"
│ ├── content_loc: (7,1)-(7,4) = "wtf"
│ ├── closing_loc: (7,4)-(7,7) = "/nm"
│ └── unescaped: "wtf"
└── @ RegularExpressionNode (location: (9,0)-(9,13))
├── flags: multi_line, ascii_8bit
├── flags: multi_line, ascii_8bit, forced_us_ascii_encoding
├── opening_loc: (9,0)-(9,1) = "/"
├── content_loc: (9,1)-(9,4) = "wtf"
├── closing_loc: (9,4)-(9,13) = "/nmnmnmnm"

Просмотреть файл

@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,7))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,7))
├── flags:
├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,6) = "\\cC\\d"
├── closing_loc: (1,6)-(1,7) = "/"

Просмотреть файл

@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,17))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,17))
├── flags:
├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,16) = "[\\u0021-\\u0027]"
├── closing_loc: (1,16)-(1,17) = "/"

Просмотреть файл

@ -4,13 +4,13 @@
@ StatementsNode (location: (1,0)-(3,8))
└── body: (length: 2)
├── @ RegularExpressionNode (location: (1,0)-(1,15))
│ ├── flags:
│ ├── flags: forced_utf8_encoding
│ ├── opening_loc: (1,0)-(1,1) = "/"
│ ├── content_loc: (1,1)-(1,14) = "\\u{c0de babe}"
│ ├── closing_loc: (1,14)-(1,15) = "/"
│ └── unescaped: "\\u{c0de babe}"
└── @ RegularExpressionNode (location: (3,0)-(3,8))
├── flags:
├── flags: forced_utf8_encoding
├── opening_loc: (3,0)-(3,1) = "/"
├── content_loc: (3,1)-(3,7) = "\\u{df}"
├── closing_loc: (3,7)-(3,8) = "/"

Просмотреть файл

@ -46,7 +46,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 1)
│ │ └── @ RegularExpressionNode (location: (5,4)-(8,0))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (5,4)-(6,0) = "%r\n"
│ │ ├── content_loc: (6,0)-(6,0) = ""
│ │ ├── closing_loc: (7,0)-(8,0) = "\n"

Просмотреть файл

@ -15,7 +15,7 @@
│ ├── opening_loc: (1,0)-(1,1) = "["
│ └── closing_loc: (1,9)-(1,10) = "]"
├── @ RegularExpressionNode (location: (3,0)-(3,8))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (3,0)-(3,1) = "/"
│ ├── content_loc: (3,1)-(3,7) = "\\c\#{1}"
│ ├── closing_loc: (3,7)-(3,8) = "/"

Просмотреть файл

@ -7,7 +7,7 @@
│ ├── if_keyword_loc: (1,0)-(1,2) = "if"
│ ├── predicate:
│ │ @ MatchLastLineNode (location: (1,3)-(1,8))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,3)-(1,4) = "/"
│ │ ├── content_loc: (1,4)-(1,7) = "foo"
│ │ ├── closing_loc: (1,7)-(1,8) = "/"

Просмотреть файл

@ -566,13 +566,13 @@
│ ├── closing_loc: (48,2)-(48,3) = "\""
│ └── unescaped: ""
├── @ RegularExpressionNode (location: (49,0)-(49,5))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (49,0)-(49,1) = "/"
│ ├── content_loc: (49,1)-(49,4) = "foo"
│ ├── closing_loc: (49,4)-(49,5) = "/"
│ └── unescaped: "foo"
├── @ RegularExpressionNode (location: (50,0)-(50,28))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (50,0)-(50,1) = "/"
│ ├── content_loc: (50,1)-(50,27) = "[^-+',.\\/:@[:alnum:]\\[\\]]+"
│ ├── closing_loc: (50,27)-(50,28) = "/"
@ -633,25 +633,25 @@
│ │ └── closing_loc: (53,11)-(53,12) = "}"
│ └── closing_loc: (53,12)-(53,13) = "/"
├── @ RegularExpressionNode (location: (54,0)-(54,4))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (54,0)-(54,1) = "/"
│ ├── content_loc: (54,1)-(54,3) = "\\n"
│ ├── closing_loc: (54,3)-(54,4) = "/"
│ └── unescaped: "\\n"
├── @ RegularExpressionNode (location: (55,0)-(55,4))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (55,0)-(55,1) = "/"
│ ├── content_loc: (55,1)-(55,3) = "\\n"
│ ├── closing_loc: (55,3)-(55,4) = "/"
│ └── unescaped: "\\n"
├── @ RegularExpressionNode (location: (56,0)-(56,5))
│ ├── flags: extended
│ ├── flags: extended, forced_us_ascii_encoding
│ ├── opening_loc: (56,0)-(56,1) = "/"
│ ├── content_loc: (56,1)-(56,3) = "\\n"
│ ├── closing_loc: (56,3)-(56,5) = "/x"
│ └── unescaped: "\\n"
├── @ RegularExpressionNode (location: (57,0)-(57,7))
│ ├── flags: extended
│ ├── flags: extended, forced_us_ascii_encoding
│ ├── opening_loc: (57,0)-(57,1) = "/"
│ ├── content_loc: (57,1)-(57,5) = "\\/\\/"
│ ├── closing_loc: (57,5)-(57,7) = "/x"

Просмотреть файл

@ -425,7 +425,7 @@
│ │ │ ├── flags: ∅
│ │ │ ├── receiver:
│ │ │ │ @ RegularExpressionNode (location: (37,1)-(37,6))
│ │ │ │ ├── flags:
│ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ ├── opening_loc: (37,1)-(37,2) = "/"
│ │ │ │ ├── content_loc: (37,2)-(37,5) = "bar"
│ │ │ │ ├── closing_loc: (37,5)-(37,6) = "/"
@ -511,7 +511,7 @@
│ │ │ │ ├── flags: ∅
│ │ │ │ └── arguments: (length: 1)
│ │ │ │ └── @ RegularExpressionNode (location: (39,8)-(39,13))
│ │ │ │ ├── flags:
│ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ ├── opening_loc: (39,8)-(39,9) = "/"
│ │ │ │ ├── content_loc: (39,9)-(39,12) = "bar"
│ │ │ │ ├── closing_loc: (39,12)-(39,13) = "/"
@ -531,7 +531,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (40,0)-(40,5))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (40,0)-(40,1) = "/"
│ │ ├── content_loc: (40,1)-(40,4) = "bar"
│ │ ├── closing_loc: (40,4)-(40,5) = "/"
@ -556,7 +556,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (41,0)-(41,5))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (41,0)-(41,1) = "/"
│ │ ├── content_loc: (41,1)-(41,4) = "bar"
│ │ ├── closing_loc: (41,4)-(41,5) = "/"
@ -758,7 +758,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 1)
│ │ └── @ RegularExpressionNode (location: (49,7)-(49,12))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (49,7)-(49,8) = "/"
│ │ ├── content_loc: (49,8)-(49,11) = "bar"
│ │ ├── closing_loc: (49,11)-(49,12) = "/"
@ -1007,7 +1007,7 @@
│ │ │ ├── flags: ∅
│ │ │ └── arguments: (length: 1)
│ │ │ └── @ RegularExpressionNode (location: (57,11)-(57,16))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (57,11)-(57,12) = "/"
│ │ │ ├── content_loc: (57,12)-(57,15) = "bar"
│ │ │ ├── closing_loc: (57,15)-(57,16) = "/"

Просмотреть файл

@ -31,13 +31,13 @@
│ ├── closing_loc: ∅
│ └── unescaped: "c"
├── @ RegularExpressionNode (location: (9,0)-(9,5))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (9,0)-(9,3) = "%r("
│ ├── content_loc: (9,3)-(9,4) = "/"
│ ├── closing_loc: (9,4)-(9,5) = ")"
│ └── unescaped: "/"
├── @ RegularExpressionNode (location: (10,0)-(10,6))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (10,0)-(10,3) = "%r("
│ ├── content_loc: (10,3)-(10,5) = "\\)"
│ ├── closing_loc: (10,5)-(10,6) = ")"

Просмотреть файл

@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,5))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,5))
├── flags: extended
├── flags: extended, forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,3) = "#)"
├── closing_loc: (1,3)-(1,5) = "/x"

Просмотреть файл

@ -7,7 +7,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ MatchLastLineNode (location: (1,1)-(1,6))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,1)-(1,2) = "/"
│ │ ├── content_loc: (1,2)-(1,5) = "wat"
│ │ ├── closing_loc: (1,5)-(1,6) = "/"
@ -23,7 +23,7 @@
├── if_keyword_loc: (3,0)-(3,2) = "if"
├── predicate:
│ @ MatchLastLineNode (location: (3,3)-(3,8))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (3,3)-(3,4) = "/"
│ ├── content_loc: (3,4)-(3,7) = "wat"
│ ├── closing_loc: (3,7)-(3,8) = "/"

Просмотреть файл

@ -106,13 +106,13 @@
│ ├── closing_loc: (23,8)-(23,9) = "}"
│ └── unescaped: "\#@@1"
├── @ RegularExpressionNode (location: (25,1)-(25,8))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (25,1)-(25,4) = "%r{"
│ ├── content_loc: (25,4)-(25,7) = "\#@1"
│ ├── closing_loc: (25,7)-(25,8) = "}"
│ └── unescaped: "\#@1"
├── @ RegularExpressionNode (location: (27,1)-(27,9))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (27,1)-(27,4) = "%r{"
│ ├── content_loc: (27,4)-(27,8) = "\#@@1"
│ ├── closing_loc: (27,8)-(27,9) = "}"
@ -188,13 +188,13 @@
│ ├── closing_loc: (47,6)-(47,7) = "'"
│ └── unescaped: "\#@@1"
├── @ RegularExpressionNode (location: (49,1)-(49,6))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (49,1)-(49,2) = "/"
│ ├── content_loc: (49,2)-(49,5) = "\#@1"
│ ├── closing_loc: (49,5)-(49,6) = "/"
│ └── unescaped: "\#@1"
├── @ RegularExpressionNode (location: (51,1)-(51,7))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (51,1)-(51,2) = "/"
│ ├── content_loc: (51,2)-(51,6) = "\#@@1"
│ ├── closing_loc: (51,6)-(51,7) = "/"

Просмотреть файл

@ -9,7 +9,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (1,0)-(1,15))
│ │ │ ├── flags:
│ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (1,0)-(1,1) = "/"
│ │ │ ├── content_loc: (1,1)-(1,14) = "(?<match>bar)"
│ │ │ ├── closing_loc: (1,14)-(1,15) = "/"

Просмотреть файл

@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,4))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,4))
├── flags:
├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,3) = "\\("
├── closing_loc: (1,3)-(1,4) = "/"

Просмотреть файл

@ -55,7 +55,7 @@
│ ├── closing_loc: (17,1)-(17,2) = "}"
│ └── unescaped: "a\\\nb"
├── @ RegularExpressionNode (location: (19,0)-(20,2))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (19,0)-(19,3) = "%r{"
│ ├── content_loc: (19,3)-(20,1) = "a\\\nb"
│ ├── closing_loc: (20,1)-(20,2) = "}"
@ -96,7 +96,7 @@
│ ├── closing_loc: (35,1)-(35,2) = "'"
│ └── unescaped: "a\\\nb"
├── @ RegularExpressionNode (location: (37,0)-(38,2))
│ ├── flags:
│ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (37,0)-(37,1) = "/"
│ ├── content_loc: (37,1)-(38,1) = "a\\\nb"
│ ├── closing_loc: (38,1)-(38,2) = "/"

Просмотреть файл

@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,10))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,10))
├── flags: ignore_case, multi_line
├── flags: ignore_case, multi_line, forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,7) = "source"
├── closing_loc: (1,7)-(1,10) = "/im"

Просмотреть файл

@ -112,7 +112,7 @@
│ │ │ ├── closing_loc: (3,7)-(3,8) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (3,10)-(3,13))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (3,10)-(3,11) = "/"
│ │ ├── content_loc: (3,11)-(3,12) = "x"
│ │ ├── closing_loc: (3,12)-(3,13) = "/"
@ -173,7 +173,7 @@
│ │ │ ├── closing_loc: (5,7)-(5,8) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (5,10)-(5,14))
│ │ ├── flags: multi_line
│ │ ├── flags: multi_line, forced_us_ascii_encoding
│ │ ├── opening_loc: (5,10)-(5,11) = "/"
│ │ ├── content_loc: (5,11)-(5,12) = "x"
│ │ ├── closing_loc: (5,12)-(5,14) = "/m"
@ -295,7 +295,7 @@
│ │ │ ├── closing_loc: (9,8)-(9,9) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (9,11)-(9,14))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (9,11)-(9,12) = "/"
│ │ ├── content_loc: (9,12)-(9,13) = "x"
│ │ ├── closing_loc: (9,13)-(9,14) = "/"
@ -356,7 +356,7 @@
│ │ │ ├── closing_loc: (11,8)-(11,9) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (11,11)-(11,15))
│ │ ├── flags: multi_line
│ │ ├── flags: multi_line, forced_us_ascii_encoding
│ │ ├── opening_loc: (11,11)-(11,12) = "/"
│ │ ├── content_loc: (11,12)-(11,13) = "x"
│ │ ├── closing_loc: (11,13)-(11,15) = "/m"
@ -488,7 +488,7 @@
│ │ │ ├── opening_loc: (15,3)-(15,4) = "{"
│ │ │ └── closing_loc: (15,7)-(15,8) = "}"
│ │ └── @ RegularExpressionNode (location: (15,10)-(15,13))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (15,10)-(15,11) = "/"
│ │ ├── content_loc: (15,11)-(15,12) = "x"
│ │ ├── closing_loc: (15,12)-(15,13) = "/"
@ -554,7 +554,7 @@
│ │ │ ├── opening_loc: (17,3)-(17,4) = "{"
│ │ │ └── closing_loc: (17,7)-(17,8) = "}"
│ │ └── @ RegularExpressionNode (location: (17,10)-(17,14))
│ │ ├── flags: multi_line
│ │ ├── flags: multi_line, forced_us_ascii_encoding
│ │ ├── opening_loc: (17,10)-(17,11) = "/"
│ │ ├── content_loc: (17,11)-(17,12) = "x"
│ │ ├── closing_loc: (17,12)-(17,14) = "/m"
@ -686,7 +686,7 @@
│ │ │ ├── opening_loc: (21,3)-(21,4) = "{"
│ │ │ └── closing_loc: (21,8)-(21,9) = "}"
│ │ └── @ RegularExpressionNode (location: (21,11)-(21,14))
│ │ ├── flags:
│ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (21,11)-(21,12) = "/"
│ │ ├── content_loc: (21,12)-(21,13) = "x"
│ │ ├── closing_loc: (21,13)-(21,14) = "/"
@ -752,7 +752,7 @@
│ │ ├── opening_loc: (23,3)-(23,4) = "{"
│ │ └── closing_loc: (23,8)-(23,9) = "}"
│ └── @ RegularExpressionNode (location: (23,11)-(23,15))
│ ├── flags: multi_line
│ ├── flags: multi_line, forced_us_ascii_encoding
│ ├── opening_loc: (23,11)-(23,12) = "/"
│ ├── content_loc: (23,12)-(23,13) = "x"
│ ├── closing_loc: (23,13)-(23,15) = "/m"