зеркало из https://github.com/github/ruby.git
[ruby/yarp] fix: octal, hex, and unicode strings at the end of a
file (https://github.com/ruby/yarp/pull/1371) * refactor: move EOF check into yp_unescape_calculate_difference parser_lex is a bit more readable when we can rely on that behavior * fix: octal and hex digits at the end of a file Previously this resulted in invalid memory access. * fix: unicode strings at the end of a file Previously this resulted in invalid memory access. * Unterminated curly-bracket unicode is a syntax error https://github.com/ruby/yarp/commit/21cf11acb5
This commit is contained in:
Родитель
9930363aab
Коммит
df4c77608e
|
@ -621,6 +621,13 @@ module YARP
|
|||
]
|
||||
end
|
||||
|
||||
def test_unterminated_unicode_brackets_should_be_a_syntax_error
|
||||
assert_errors expression('?\\u{3'), '?\\u{3', [
|
||||
["invalid Unicode escape.", 1..5],
|
||||
["invalid Unicode escape.", 1..5],
|
||||
]
|
||||
end
|
||||
|
||||
def test_method_parameters_after_block
|
||||
expected = DefNode(
|
||||
Location(),
|
||||
|
|
|
@ -22,6 +22,19 @@ module YARP
|
|||
snippet "incomplete escaped list", "%w[\\"
|
||||
snippet "incomplete escaped regex", "/a\\"
|
||||
snippet "unterminated heredoc with unterminated escape at end of file", "<<A\n\\"
|
||||
snippet "escaped octal at end of file 1", '"\\3'
|
||||
snippet "escaped octal at end of file 2", '"\\33'
|
||||
snippet "escaped hex at end of file 1", '"\\x'
|
||||
snippet "escaped hex at end of file 2", '"\\x3'
|
||||
snippet "escaped unicode at end of file 1", '"\\u{3'
|
||||
snippet "escaped unicode at end of file 2", '"\\u{33'
|
||||
snippet "escaped unicode at end of file 3", '"\\u{333'
|
||||
snippet "escaped unicode at end of file 4", '"\\u{3333'
|
||||
snippet "escaped unicode at end of file 5", '"\\u{33333'
|
||||
snippet "escaped unicode at end of file 6", '"\\u{333333'
|
||||
snippet "escaped unicode at end of file 7", '"\\u3'
|
||||
snippet "escaped unicode at end of file 8", '"\\u33'
|
||||
snippet "escaped unicode at end of file 9", '"\\u333'
|
||||
|
||||
snippet "statements node with multiple heredocs", <<~EOF
|
||||
for <<A + <<B
|
||||
|
|
|
@ -69,17 +69,15 @@ char_is_ascii_printable(const uint8_t b) {
|
|||
// Scan the 1-3 digits of octal into the value. Returns the number of digits
|
||||
// scanned.
|
||||
static inline size_t
|
||||
unescape_octal(const uint8_t *backslash, uint8_t *value) {
|
||||
unescape_octal(const uint8_t *backslash, uint8_t *value, const uint8_t *end) {
|
||||
*value = (uint8_t) (backslash[1] - '0');
|
||||
if (!yp_char_is_octal_digit(backslash[2])) {
|
||||
if (backslash + 2 >= end || !yp_char_is_octal_digit(backslash[2])) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
*value = (uint8_t) ((*value << 3) | (backslash[2] - '0'));
|
||||
if (!yp_char_is_octal_digit(backslash[3])) {
|
||||
if (backslash + 3 >= end || !yp_char_is_octal_digit(backslash[3])) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
*value = (uint8_t) ((*value << 3) | (backslash[3] - '0'));
|
||||
return 4;
|
||||
}
|
||||
|
@ -93,12 +91,15 @@ unescape_hexadecimal_digit(const uint8_t value) {
|
|||
// Scan the 1-2 digits of hexadecimal into the value. Returns the number of
|
||||
// digits scanned.
|
||||
static inline size_t
|
||||
unescape_hexadecimal(const uint8_t *backslash, uint8_t *value) {
|
||||
unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end) {
|
||||
*value = 0;
|
||||
if (backslash + 2 >= end || !yp_char_is_hexadecimal_digit(backslash[2])) {
|
||||
return 2;
|
||||
}
|
||||
*value = unescape_hexadecimal_digit(backslash[2]);
|
||||
if (!yp_char_is_hexadecimal_digit(backslash[3])) {
|
||||
if (backslash + 3 >= end || !yp_char_is_hexadecimal_digit(backslash[3])) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
*value = (uint8_t) ((*value << 4) | unescape_hexadecimal_digit(backslash[3]));
|
||||
return 4;
|
||||
}
|
||||
|
@ -204,7 +205,7 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
|
|||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9': {
|
||||
uint8_t value;
|
||||
const uint8_t *cursor = backslash + unescape_octal(backslash, &value);
|
||||
const uint8_t *cursor = backslash + unescape_octal(backslash, &value, end);
|
||||
|
||||
if (dest) {
|
||||
dest[(*dest_length)++] = unescape_char(value, flags);
|
||||
|
@ -214,7 +215,7 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
|
|||
// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
|
||||
case 'x': {
|
||||
uint8_t value;
|
||||
const uint8_t *cursor = backslash + unescape_hexadecimal(backslash, &value);
|
||||
const uint8_t *cursor = backslash + unescape_hexadecimal(backslash, &value, end);
|
||||
|
||||
if (dest) {
|
||||
dest[(*dest_length)++] = unescape_char(value, flags);
|
||||
|
@ -236,13 +237,14 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
|
|||
|
||||
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
|
||||
|
||||
while ((*unicode_cursor != '}') && (unicode_cursor < end)) {
|
||||
while ((unicode_cursor < end) && (*unicode_cursor != '}')) {
|
||||
const uint8_t *unicode_start = unicode_cursor;
|
||||
size_t hexadecimal_length = yp_strspn_hexadecimal_digit(unicode_cursor, end - unicode_cursor);
|
||||
|
||||
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
||||
if (hexadecimal_length > 6)
|
||||
if (hexadecimal_length > 6) {
|
||||
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
||||
}
|
||||
|
||||
// there are not hexadecimal characters
|
||||
if (hexadecimal_length == 0) {
|
||||
|
@ -269,10 +271,16 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
|
|||
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
|
||||
yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
||||
|
||||
return unicode_cursor + 1;
|
||||
|
||||
if (unicode_cursor < end && *unicode_cursor == '}') {
|
||||
unicode_cursor++;
|
||||
} else {
|
||||
yp_diagnostic_list_append(&parser->error_list, backslash, unicode_cursor, "invalid Unicode escape.");
|
||||
}
|
||||
return unicode_cursor;
|
||||
}
|
||||
|
||||
if ((backslash + 2) < end && yp_char_is_hexadecimal_digits(backslash + 2, 4)) {
|
||||
if ((backslash + 5) < end && yp_char_is_hexadecimal_digits(backslash + 2, 4)) {
|
||||
uint32_t value;
|
||||
unescape_unicode(backslash + 2, 4, &value);
|
||||
|
||||
|
@ -538,6 +546,10 @@ size_t
|
|||
yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
|
||||
assert(unescape_type != YP_UNESCAPE_NONE);
|
||||
|
||||
if (backslash + 1 >= parser->end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (backslash[1]) {
|
||||
case '\\':
|
||||
case '\'':
|
||||
|
|
44
yarp/yarp.c
44
yarp/yarp.c
|
@ -7002,17 +7002,16 @@ parser_lex(yp_parser_t *parser) {
|
|||
// literally. In this case we'll skip past the next character
|
||||
// and find the next breakpoint.
|
||||
if (*breakpoint == '\\') {
|
||||
// Check that we're not at the end of the file.
|
||||
if (breakpoint + 1 >= parser->end) {
|
||||
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||
if (difference == 0) {
|
||||
// we're at the end of the file
|
||||
breakpoint = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||
|
||||
// If the result is an escaped newline ...
|
||||
if (*(breakpoint + difference - 1) == '\n') {
|
||||
if (breakpoint[difference - 1] == '\n') {
|
||||
if (parser->heredoc_end) {
|
||||
// ... if we are on the same line as a heredoc, flush the heredoc and
|
||||
// continue parsing after heredoc_end.
|
||||
|
@ -7141,16 +7140,15 @@ parser_lex(yp_parser_t *parser) {
|
|||
// literally. In this case we'll skip past the next character
|
||||
// and find the next breakpoint.
|
||||
if (*breakpoint == '\\') {
|
||||
// Check that we're not at the end of the file.
|
||||
if (breakpoint + 1 >= parser->end) {
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
||||
if (difference == 0) {
|
||||
// we're at the end of the file
|
||||
breakpoint = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
||||
|
||||
// If the result is an escaped newline ...
|
||||
if (*(breakpoint + difference - 1) == '\n') {
|
||||
if (breakpoint[difference - 1] == '\n') {
|
||||
if (parser->heredoc_end) {
|
||||
// ... if we are on the same line as a heredoc, flush the heredoc and
|
||||
// continue parsing after heredoc_end.
|
||||
|
@ -7293,20 +7291,19 @@ parser_lex(yp_parser_t *parser) {
|
|||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
case '\\': {
|
||||
// Check that we're not at the end of the file.
|
||||
if (breakpoint + 1 >= parser->end) {
|
||||
breakpoint = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we hit escapes, then we need to treat the next token
|
||||
// literally. In this case we'll skip past the next character and
|
||||
// find the next breakpoint.
|
||||
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||
if (difference == 0) {
|
||||
// we're at the end of the file
|
||||
breakpoint = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
// If the result is an escaped newline ...
|
||||
if (*(breakpoint + difference - 1) == '\n') {
|
||||
if (breakpoint[difference - 1] == '\n') {
|
||||
if (parser->heredoc_end) {
|
||||
// ... if we are on the same line as a heredoc, flush the heredoc and
|
||||
// continue parsing after heredoc_end.
|
||||
|
@ -7463,12 +7460,6 @@ parser_lex(yp_parser_t *parser) {
|
|||
break;
|
||||
}
|
||||
case '\\': {
|
||||
// Check that we're not at the end of the file.
|
||||
if (breakpoint + 1 >= parser->end) {
|
||||
breakpoint = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we hit an escape, then we need to skip past
|
||||
// however many characters the escape takes up. However
|
||||
// it's important that if \n or \r\n are escaped that we
|
||||
|
@ -7481,6 +7472,11 @@ parser_lex(yp_parser_t *parser) {
|
|||
} else {
|
||||
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||
if (difference == 0) {
|
||||
// we're at the end of the file
|
||||
breakpoint = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче