[ruby/yarp] Properly handle invalid underscores in number literals

https://github.com/ruby/yarp/commit/35da3d1a4c
This commit is contained in:
Kevin Newton 2023-09-15 10:40:18 -04:00 коммит произвёл git
Родитель b848700ccf
Коммит 18780c22f6
6 изменённых файлов: 145 добавлений и 28 удалений

Просмотреть файл

@ -1237,6 +1237,24 @@ module YARP
assert_errors expression(source), source, errors, compare_ripper: false
end
def test_invalid_number_underscores
error_messages = ["Invalid underscore placement in number"]
assert_error_messages "1__1", error_messages
assert_error_messages "0b1__1", error_messages
assert_error_messages "0o1__1", error_messages
assert_error_messages "01__1", error_messages
assert_error_messages "0d1__1", error_messages
assert_error_messages "0x1__1", error_messages
assert_error_messages "1_1_", error_messages
assert_error_messages "0b1_1_", error_messages
assert_error_messages "0o1_1_", error_messages
assert_error_messages "01_1_", error_messages
assert_error_messages "0d1_1_", error_messages
assert_error_messages "0x1_1_", error_messages
end
private
def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby")

Просмотреть файл

@ -164,6 +164,7 @@ static const char* const diagnostic_messages[YP_DIAGNOSTIC_ID_LEN] = {
[YP_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
[YP_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
[YP_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
[YP_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
[YP_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
[YP_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
[YP_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",

Просмотреть файл

@ -130,6 +130,7 @@ typedef enum {
YP_ERR_INVALID_NUMBER_DECIMAL,
YP_ERR_INVALID_NUMBER_HEXADECIMAL,
YP_ERR_INVALID_NUMBER_OCTAL,
YP_ERR_INVALID_NUMBER_UNDERSCORE,
YP_ERR_INVALID_PERCENT,
YP_ERR_INVALID_TOKEN,
YP_ERR_INVALID_VARIABLE_GLOBAL,

Просмотреть файл

@ -123,6 +123,9 @@ yp_char_is_inline_whitespace(const uint8_t b) {
return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
}
// Scan through the string and return the number of characters at the start of
// the string that match the given kind. Disallows searching past the given
// maximum number of characters.
static inline size_t
yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
if (length <= 0) return 0;
@ -134,20 +137,57 @@ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
return size;
}
// Scan through the string and return the number of characters at the start of
// the string that match the given kind. Disallows searching past the given
// maximum number of characters.
//
// Additionally, report the location of the last invalid underscore character
// found in the string through the out invalid parameter.
static inline size_t
yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
if (length <= 0) return 0;
size_t size = 0;
size_t maximum = (size_t) length;
bool underscore = false;
while (size < maximum && (yp_number_table[string[size]] & kind)) {
if (string[size] == '_') {
if (underscore) *invalid = string + size;
underscore = true;
} else {
underscore = false;
}
size++;
}
if (string[size - 1] == '_') *invalid = string + size - 1;
return size;
}
// Returns the number of characters at the start of the string that are binary
// digits or underscores. Disallows searching past the given maximum number of
// characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
}
// Returns the number of characters at the start of the string that are octal
// digits or underscores. Disallows searching past the given maximum number of
// digits or underscores. Disallows searching past the given maximum number of
// characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
}
// Returns the number of characters at the start of the string that are decimal
@ -160,9 +200,13 @@ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
// Returns the number of characters at the start of the string that are decimal
// digits or underscores. Disallows searching past the given maximum number of
// characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
}
// Returns the number of characters at the start of the string that are
@ -176,9 +220,13 @@ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
// Returns the number of characters at the start of the string that are
// hexadecimal digits or underscores. Disallows searching past the given maximum
// number of characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
}
static inline bool

Просмотреть файл

@ -31,19 +31,31 @@ size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are octal
// digits or underscores. Disallows searching past the given maximum number of
// digits or underscores. Disallows searching past the given maximum number of
// characters.
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length);
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are decimal
// digits or underscores. Disallows searching past the given maximum number of
// characters.
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length);
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are
// hexadecimal digits or underscores. Disallows searching past the given maximum
// number of characters.
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length);
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are regexp
// options. Disallows searching past the given maximum number of characters.
@ -52,7 +64,11 @@ size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are binary
// digits or underscores. Disallows searching past the given maximum number of
// characters.
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length);
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns true if the given character is a whitespace character.
bool yp_char_is_whitespace(const uint8_t b);

Просмотреть файл

@ -5330,6 +5330,45 @@ context_def_p(yp_parser_t *parser) {
/* Specific token lexers */
/******************************************************************************/
static void
yp_strspn_number_validate(yp_parser_t *parser, const uint8_t *invalid) {
if (invalid != NULL) {
yp_diagnostic_list_append(&parser->error_list, invalid, invalid + 1, YP_ERR_INVALID_NUMBER_UNDERSCORE);
}
}
static size_t
yp_strspn_binary_number_validate(yp_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = yp_strspn_binary_number(string, parser->end - string, &invalid);
yp_strspn_number_validate(parser, invalid);
return length;
}
static size_t
yp_strspn_octal_number_validate(yp_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = yp_strspn_octal_number(string, parser->end - string, &invalid);
yp_strspn_number_validate(parser, invalid);
return length;
}
static size_t
yp_strspn_decimal_number_validate(yp_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = yp_strspn_decimal_number(string, parser->end - string, &invalid);
yp_strspn_number_validate(parser, invalid);
return length;
}
static size_t
yp_strspn_hexadecimal_number_validate(yp_parser_t *parser, const uint8_t *string) {
const uint8_t *invalid = NULL;
size_t length = yp_strspn_hexadecimal_number(string, parser->end - string, &invalid);
yp_strspn_number_validate(parser, invalid);
return length;
}
static yp_token_type_t
lex_optional_float_suffix(yp_parser_t *parser) {
yp_token_type_t type = YP_TOKEN_INTEGER;
@ -5339,7 +5378,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
if (peek(parser) == '.') {
if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
parser->current.end += 2;
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
type = YP_TOKEN_FLOAT;
} else {
// If we had a . and then something else, then it's not a float suffix on
@ -5355,7 +5394,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
if (yp_char_is_decimal_digit(*parser->current.end)) {
parser->current.end++;
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
type = YP_TOKEN_FLOAT;
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_FLOAT_EXPONENT);
@ -5377,7 +5416,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'D':
parser->current.end++;
if (yp_char_is_decimal_digit(peek(parser))) {
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_DECIMAL);
}
@ -5389,7 +5428,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'B':
parser->current.end++;
if (yp_char_is_binary_digit(peek(parser))) {
parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_binary_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_BINARY);
}
@ -5402,7 +5441,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'O':
parser->current.end++;
if (yp_char_is_octal_digit(peek(parser))) {
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_OCTAL);
}
@ -5420,7 +5459,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case '5':
case '6':
case '7':
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end);
parser->integer_base = YP_INTEGER_BASE_FLAGS_OCTAL;
break;
@ -5429,7 +5468,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'X':
parser->current.end++;
if (yp_char_is_hexadecimal_digit(peek(parser))) {
parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_hexadecimal_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_HEXADECIMAL);
}
@ -5453,18 +5492,12 @@ lex_numeric_prefix(yp_parser_t *parser) {
} else {
// If it didn't start with a 0, then we'll lex as far as we can into a
// decimal number.
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
// Afterward, we'll lex as far as we can into an optional float suffix.
type = lex_optional_float_suffix(parser);
}
// If the last character that we consumed was an underscore, then this is
// actually an invalid integer value, and we should return an invalid token.
if (peek_offset(parser, -1) == '_') {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_NUMBER_LITERAL_UNDERSCORE);
}
return type;
}