[ruby/yarp] Fix heredocs inside %W and %w lists

The problem was that we were treating heredoc bodies as part of the %W
list because we didn't push the scanning cursor past the heredoc after
lexing out the here doc.  To fix this, we changed the whitespace
scanning function to quit scanning when it reaches a newline but only in
the case that a heredoc is present.

Additionally, we need to prevent double counting newlines in the case of
a heredoc.  For example:

```ruby
%W(<<foo 123)
foo
```

The newline after the `)` is counted as part of scanning the heredoc, so
we added logic to prevent double counting the newline when scanning the
rest of the %W list.

https://github.com/ruby/yarp/commit/eb090d8126

Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com>
This commit is contained in:
Aaron Patterson 2023-07-18 14:37:26 -07:00 коммит произвёл git
Родитель 5c219c1b7f
Коммит abce8583e2
6 изменённых файлов: 53 добавлений и 7 удалений

Просмотреть файл

@ -0,0 +1,28 @@
ProgramNode(0...30)(
[],
StatementsNode(0...30)(
[ArrayNode(0...30)(
[StringNode(4...5)(nil, (4...5), nil, "1"),
InterpolatedStringNode(0...12)(
nil,
[EmbeddedStatementsNode(6...12)(
(6...8),
StatementsNode(8...19)(
[InterpolatedStringNode(8...19)(
(8...11),
[StringNode(15...17)(nil, (15...17), nil, "2\n")],
(17...19)
)]
),
(11...12)
)],
nil
),
StringNode(13...14)(nil, (13...14), nil, "3"),
StringNode(25...26)(nil, (25...26), nil, "4"),
StringNode(27...28)(nil, (27...28), nil, "5")],
(0...3),
(29...30)
)]
)
)

Просмотреть файл

@ -28,7 +28,6 @@ class ParseTest < Test::Unit::TestCase
known_failures = %w[ known_failures = %w[
seattlerb/heredoc_nested.txt seattlerb/heredoc_nested.txt
seattlerb/pct_w_heredoc_interp_nested.txt
] ]
def find_source_file_node(node) def find_source_file_node(node)

Просмотреть файл

@ -75,7 +75,7 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) {
// whitespace while also tracking the location of each newline. Disallows // whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters. // searching past the given maximum number of characters.
size_t size_t
yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list) { yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool stop_at_newline) {
if (length <= 0) return 0; if (length <= 0) return 0;
size_t size = 0; size_t size = 0;
@ -83,7 +83,12 @@ yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t
while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) { while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
if (string[size] == '\n') { if (string[size] == '\n') {
yp_newline_list_append(newline_list, string + size); if (stop_at_newline) {
return size + 1;
}
else {
yp_newline_list_append(newline_list, string + size);
}
} }
size++; size++;

Просмотреть файл

@ -15,7 +15,7 @@ size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
// whitespace while also tracking the location of each newline. Disallows // whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters. // searching past the given maximum number of characters.
size_t size_t
yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list); yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool);
// Returns the number of characters at the start of the string that are inline // Returns the number of characters at the start of the string that are inline
// whitespace. Disallows searching past the given maximum number of characters. // whitespace. Disallows searching past the given maximum number of characters.

Просмотреть файл

@ -25,13 +25,15 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity
bool bool
yp_newline_list_append(yp_newline_list_t *list, const char *cursor) { yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
if (list->size == list->capacity) { if (list->size == list->capacity) {
list->capacity = list->capacity * 3 / 2; list->capacity = (list->capacity * 3) / 2;
list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t)); list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
if (list->offsets == NULL) return false; if (list->offsets == NULL) return false;
} }
assert(cursor >= list->start); assert(cursor >= list->start);
list->offsets[list->size++] = (size_t) (cursor - list->start + 1); size_t newline_offset = (size_t) (cursor - list->start + 1);
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
list->offsets[list->size++] = newline_offset;
return true; return true;
} }

Просмотреть файл

@ -6505,14 +6505,26 @@ parser_lex(yp_parser_t *parser) {
} }
} }
case YP_LEX_LIST: case YP_LEX_LIST:
if (parser->next_start != NULL) {
parser->current.end = parser->next_start;
parser->next_start = NULL;
}
// First we'll set the beginning of the token. // First we'll set the beginning of the token.
parser->current.start = parser->current.end; parser->current.start = parser->current.end;
// If there's any whitespace at the start of the list, then we're // If there's any whitespace at the start of the list, then we're
// going to trim it off the beginning and create a new token. // going to trim it off the beginning and create a new token.
size_t whitespace; size_t whitespace;
if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list)) > 0) {
bool should_stop = parser->heredoc_end;
if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
parser->current.end += whitespace; parser->current.end += whitespace;
if (parser->current.end[-1] == '\n') {
// mutates next_start
parser_flush_heredoc_end(parser);
}
LEX(YP_TOKEN_WORDS_SEP); LEX(YP_TOKEN_WORDS_SEP);
} }