[ruby/yarp] fix: handling escaped whitespace in a %w list

Introduces a new flavor of unescaping, YP_UNESCAPE_WHITESPACE, which
is the same as MINIMAL but also unescapes whitespace.

Note that a spanning_heredoc.txt fixture test is updated to be less
wrong, but YARP's behavior doesn't yet fully match Ruby in this case.

Fixes https://github.com/ruby/yarp/pull/1505

https://github.com/ruby/yarp/commit/0af69bdeb1
This commit is contained in:
Mike Dalessio 2023-09-15 21:17:37 -04:00 коммит произвёл git
Родитель ffc1fc7a6d
Коммит 0cda3ac454
8 изменённых файлов: 45 добавлений и 8 удалений

Просмотреть файл

@ -179,7 +179,7 @@
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── content_loc: (532...535) = "j\\\n"
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ └── unescaped: "j\\\n"
│ │ │ │ └── unescaped: "j\n"
│ │ │ └── @ StringNode (location: (539...540))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: ∅

Просмотреть файл

@ -292,7 +292,7 @@
│ │ │ ├── opening_loc: ∅
│ │ │ ├── content_loc: (290...298) = "foo\\ bar"
│ │ │ ├── closing_loc: ∅
│ │ │ └── unescaped: "foo\\ bar"
│ │ │ └── unescaped: "foo bar"
│ │ └── @ StringNode (location: (299...304))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
@ -308,7 +308,7 @@
│ │ │ ├── opening_loc: ∅
│ │ │ ├── content_loc: (310...318) = "foo\\ bar"
│ │ │ ├── closing_loc: ∅
│ │ │ └── unescaped: "foo\\ bar"
│ │ │ └── unescaped: "foo bar"
│ │ └── @ StringNode (location: (319...322))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅

Просмотреть файл

@ -67,7 +67,7 @@
│ │ ├── opening_loc: ∅
│ │ ├── content_loc: (81...85) = "a\\\nb"
│ │ ├── closing_loc: ∅
│ │ └── unescaped: "a\\\nb"
│ │ └── unescaped: "a\nb"
│ ├── opening_loc: (78...81) = "%w{"
│ └── closing_loc: (85...86) = "}"
├── @ XStringNode (location: (88...96))

Просмотреть файл

@ -136,6 +136,13 @@ module YARP
assert_unescape_all("g", "\\g")
end
def test_whitespace_escaping_string_list
assert_equal("a b", Debug.unescape_whitespace("a\\ b"))
assert_equal("a\tb", Debug.unescape_whitespace("a\\\tb"))
assert_equal("a\nb", Debug.unescape_whitespace("a\\\nb"))
assert_equal("a\nb", Debug.unescape_whitespace("a\\\r\nb"))
end
private
def unescape_all(source)

Просмотреть файл

@ -491,6 +491,12 @@ unescape_minimal(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_MINIMAL);
}
// Escape the given string minimally plus whitespace. Returns the unescaped string.
static VALUE
unescape_whitespace(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_WHITESPACE);
}
// Unescape everything in the given string. Return the unescaped string.
static VALUE
unescape_all(VALUE self, VALUE source) {
@ -608,6 +614,7 @@ Init_yarp(void) {
rb_define_singleton_method(rb_cYARPDebug, "named_captures", named_captures, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_none", unescape_none, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_minimal", unescape_minimal, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_whitespace", unescape_whitespace, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);

Просмотреть файл

@ -509,7 +509,17 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
cursor = backslash + 2;
break;
default:
if (unescape_type == YP_UNESCAPE_MINIMAL) {
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
if (backslash[1] == '\r' && backslash[2] == '\n') {
cursor = backslash + 2;
break;
}
if (yp_strspn_whitespace(backslash + 1, 1)) {
cursor = backslash + 1;
break;
}
}
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
// In this case we're escaping something that doesn't need escaping.
dest[dest_length++] = '\\';
cursor = backslash + 1;
@ -579,7 +589,16 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
case '\'':
return 2;
default: {
if (unescape_type == YP_UNESCAPE_MINIMAL) {
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
if (backslash[1] == '\r' && backslash[2] == '\n') {
return 2;
}
size_t whitespace = yp_strspn_whitespace(backslash + 1, 1);
if (whitespace > 0) {
return whitespace;
}
}
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
return 1 + yp_char_width(parser, backslash + 1, parser->end);
}

Просмотреть файл

@ -24,9 +24,13 @@ typedef enum {
// single quotes and backslashes.
YP_UNESCAPE_MINIMAL,
// When we're unescaping a string list, in addition to MINIMAL, we need to
// unescape whitespace.
YP_UNESCAPE_WHITESPACE,
// When we're unescaping a double-quoted string, we need to unescape all
// escapes.
YP_UNESCAPE_ALL
YP_UNESCAPE_ALL,
} yp_unescape_type_t;
// Unescape the contents of the given token into the given string using the given unescape mode.

Просмотреть файл

@ -12987,7 +12987,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_MINIMAL);
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_WHITESPACE);
yp_array_node_elements_append(array, string);
}