зеркало из https://github.com/github/ruby.git
[ruby/yarp] Properly handle invalid underscores in number literals
https://github.com/ruby/yarp/commit/35da3d1a4c
This commit is contained in:
Родитель
b848700ccf
Коммит
18780c22f6
|
@ -1237,6 +1237,24 @@ module YARP
|
|||
assert_errors expression(source), source, errors, compare_ripper: false
|
||||
end
|
||||
|
||||
def test_invalid_number_underscores
|
||||
error_messages = ["Invalid underscore placement in number"]
|
||||
|
||||
assert_error_messages "1__1", error_messages
|
||||
assert_error_messages "0b1__1", error_messages
|
||||
assert_error_messages "0o1__1", error_messages
|
||||
assert_error_messages "01__1", error_messages
|
||||
assert_error_messages "0d1__1", error_messages
|
||||
assert_error_messages "0x1__1", error_messages
|
||||
|
||||
assert_error_messages "1_1_", error_messages
|
||||
assert_error_messages "0b1_1_", error_messages
|
||||
assert_error_messages "0o1_1_", error_messages
|
||||
assert_error_messages "01_1_", error_messages
|
||||
assert_error_messages "0d1_1_", error_messages
|
||||
assert_error_messages "0x1_1_", error_messages
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby")
|
||||
|
|
|
@ -164,6 +164,7 @@ static const char* const diagnostic_messages[YP_DIAGNOSTIC_ID_LEN] = {
|
|||
[YP_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
|
||||
[YP_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
|
||||
[YP_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
|
||||
[YP_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
|
||||
[YP_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
|
||||
[YP_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
|
||||
[YP_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",
|
||||
|
|
|
@ -130,6 +130,7 @@ typedef enum {
|
|||
YP_ERR_INVALID_NUMBER_DECIMAL,
|
||||
YP_ERR_INVALID_NUMBER_HEXADECIMAL,
|
||||
YP_ERR_INVALID_NUMBER_OCTAL,
|
||||
YP_ERR_INVALID_NUMBER_UNDERSCORE,
|
||||
YP_ERR_INVALID_PERCENT,
|
||||
YP_ERR_INVALID_TOKEN,
|
||||
YP_ERR_INVALID_VARIABLE_GLOBAL,
|
||||
|
|
|
@ -123,6 +123,9 @@ yp_char_is_inline_whitespace(const uint8_t b) {
|
|||
return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
|
||||
}
|
||||
|
||||
// Scan through the string and return the number of characters at the start of
|
||||
// the string that match the given kind. Disallows searching past the given
|
||||
// maximum number of characters.
|
||||
static inline size_t
|
||||
yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
|
||||
if (length <= 0) return 0;
|
||||
|
@ -134,20 +137,57 @@ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
|
|||
return size;
|
||||
}
|
||||
|
||||
// Scan through the string and return the number of characters at the start of
|
||||
// the string that match the given kind. Disallows searching past the given
|
||||
// maximum number of characters.
|
||||
//
|
||||
// Additionally, report the location of the last invalid underscore character
|
||||
// found in the string through the out invalid parameter.
|
||||
static inline size_t
|
||||
yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
|
||||
if (length <= 0) return 0;
|
||||
|
||||
size_t size = 0;
|
||||
size_t maximum = (size_t) length;
|
||||
|
||||
bool underscore = false;
|
||||
while (size < maximum && (yp_number_table[string[size]] & kind)) {
|
||||
if (string[size] == '_') {
|
||||
if (underscore) *invalid = string + size;
|
||||
underscore = true;
|
||||
} else {
|
||||
underscore = false;
|
||||
}
|
||||
|
||||
size++;
|
||||
}
|
||||
|
||||
if (string[size - 1] == '_') *invalid = string + size - 1;
|
||||
return size;
|
||||
}
|
||||
|
||||
// Returns the number of characters at the start of the string that are binary
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// characters.
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t
|
||||
yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) {
|
||||
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
|
||||
yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
||||
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
|
||||
}
|
||||
|
||||
// Returns the number of characters at the start of the string that are octal
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// characters.
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t
|
||||
yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) {
|
||||
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
|
||||
yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
||||
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
|
||||
}
|
||||
|
||||
// Returns the number of characters at the start of the string that are decimal
|
||||
|
@ -160,9 +200,13 @@ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
|
|||
// Returns the number of characters at the start of the string that are decimal
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// characters.
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t
|
||||
yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) {
|
||||
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
|
||||
yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
||||
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
|
||||
}
|
||||
|
||||
// Returns the number of characters at the start of the string that are
|
||||
|
@ -176,9 +220,13 @@ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
|
|||
// Returns the number of characters at the start of the string that are
|
||||
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
||||
// number of characters.
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t
|
||||
yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) {
|
||||
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
|
||||
yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
||||
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
|
@ -31,19 +31,31 @@ size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
|
|||
size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
|
||||
|
||||
// Returns the number of characters at the start of the string that are octal
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// characters.
|
||||
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length);
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
||||
|
||||
// Returns the number of characters at the start of the string that are decimal
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// characters.
|
||||
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length);
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
||||
|
||||
// Returns the number of characters at the start of the string that are
|
||||
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
||||
// number of characters.
|
||||
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length);
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
||||
|
||||
// Returns the number of characters at the start of the string that are regexp
|
||||
// options. Disallows searching past the given maximum number of characters.
|
||||
|
@ -52,7 +64,11 @@ size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
|
|||
// Returns the number of characters at the start of the string that are binary
|
||||
// digits or underscores. Disallows searching past the given maximum number of
|
||||
// characters.
|
||||
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length);
|
||||
//
|
||||
// If multiple underscores are found in a row or if an underscore is
|
||||
// found at the end of the number, then the invalid pointer is set to the index
|
||||
// of the first invalid underscore.
|
||||
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
|
||||
|
||||
// Returns true if the given character is a whitespace character.
|
||||
bool yp_char_is_whitespace(const uint8_t b);
|
||||
|
|
61
yarp/yarp.c
61
yarp/yarp.c
|
@ -5330,6 +5330,45 @@ context_def_p(yp_parser_t *parser) {
|
|||
/* Specific token lexers */
|
||||
/******************************************************************************/
|
||||
|
||||
static void
|
||||
yp_strspn_number_validate(yp_parser_t *parser, const uint8_t *invalid) {
|
||||
if (invalid != NULL) {
|
||||
yp_diagnostic_list_append(&parser->error_list, invalid, invalid + 1, YP_ERR_INVALID_NUMBER_UNDERSCORE);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
yp_strspn_binary_number_validate(yp_parser_t *parser, const uint8_t *string) {
|
||||
const uint8_t *invalid = NULL;
|
||||
size_t length = yp_strspn_binary_number(string, parser->end - string, &invalid);
|
||||
yp_strspn_number_validate(parser, invalid);
|
||||
return length;
|
||||
}
|
||||
|
||||
static size_t
|
||||
yp_strspn_octal_number_validate(yp_parser_t *parser, const uint8_t *string) {
|
||||
const uint8_t *invalid = NULL;
|
||||
size_t length = yp_strspn_octal_number(string, parser->end - string, &invalid);
|
||||
yp_strspn_number_validate(parser, invalid);
|
||||
return length;
|
||||
}
|
||||
|
||||
static size_t
|
||||
yp_strspn_decimal_number_validate(yp_parser_t *parser, const uint8_t *string) {
|
||||
const uint8_t *invalid = NULL;
|
||||
size_t length = yp_strspn_decimal_number(string, parser->end - string, &invalid);
|
||||
yp_strspn_number_validate(parser, invalid);
|
||||
return length;
|
||||
}
|
||||
|
||||
static size_t
|
||||
yp_strspn_hexadecimal_number_validate(yp_parser_t *parser, const uint8_t *string) {
|
||||
const uint8_t *invalid = NULL;
|
||||
size_t length = yp_strspn_hexadecimal_number(string, parser->end - string, &invalid);
|
||||
yp_strspn_number_validate(parser, invalid);
|
||||
return length;
|
||||
}
|
||||
|
||||
static yp_token_type_t
|
||||
lex_optional_float_suffix(yp_parser_t *parser) {
|
||||
yp_token_type_t type = YP_TOKEN_INTEGER;
|
||||
|
@ -5339,7 +5378,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
|
|||
if (peek(parser) == '.') {
|
||||
if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
|
||||
parser->current.end += 2;
|
||||
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
|
||||
type = YP_TOKEN_FLOAT;
|
||||
} else {
|
||||
// If we had a . and then something else, then it's not a float suffix on
|
||||
|
@ -5355,7 +5394,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
|
|||
|
||||
if (yp_char_is_decimal_digit(*parser->current.end)) {
|
||||
parser->current.end++;
|
||||
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
|
||||
type = YP_TOKEN_FLOAT;
|
||||
} else {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_FLOAT_EXPONENT);
|
||||
|
@ -5377,7 +5416,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|||
case 'D':
|
||||
parser->current.end++;
|
||||
if (yp_char_is_decimal_digit(peek(parser))) {
|
||||
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
|
||||
} else {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_DECIMAL);
|
||||
}
|
||||
|
@ -5389,7 +5428,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|||
case 'B':
|
||||
parser->current.end++;
|
||||
if (yp_char_is_binary_digit(peek(parser))) {
|
||||
parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_binary_number_validate(parser, parser->current.end);
|
||||
} else {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_BINARY);
|
||||
}
|
||||
|
@ -5402,7 +5441,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|||
case 'O':
|
||||
parser->current.end++;
|
||||
if (yp_char_is_octal_digit(peek(parser))) {
|
||||
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end);
|
||||
} else {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_OCTAL);
|
||||
}
|
||||
|
@ -5420,7 +5459,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end);
|
||||
parser->integer_base = YP_INTEGER_BASE_FLAGS_OCTAL;
|
||||
break;
|
||||
|
||||
|
@ -5429,7 +5468,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|||
case 'X':
|
||||
parser->current.end++;
|
||||
if (yp_char_is_hexadecimal_digit(peek(parser))) {
|
||||
parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
||||
} else {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_HEXADECIMAL);
|
||||
}
|
||||
|
@ -5453,18 +5492,12 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|||
} else {
|
||||
// If it didn't start with a 0, then we'll lex as far as we can into a
|
||||
// decimal number.
|
||||
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
||||
parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
|
||||
|
||||
// Afterward, we'll lex as far as we can into an optional float suffix.
|
||||
type = lex_optional_float_suffix(parser);
|
||||
}
|
||||
|
||||
// If the last character that we consumed was an underscore, then this is
|
||||
// actually an invalid integer value, and we should return an invalid token.
|
||||
if (peek_offset(parser, -1) == '_') {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_NUMBER_LITERAL_UNDERSCORE);
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче