diff --git a/test/yarp/errors_test.rb b/test/yarp/errors_test.rb index 3539c54cf4..54cbda14d1 100644 --- a/test/yarp/errors_test.rb +++ b/test/yarp/errors_test.rb @@ -1237,6 +1237,24 @@ module YARP assert_errors expression(source), source, errors, compare_ripper: false end + def test_invalid_number_underscores + error_messages = ["Invalid underscore placement in number"] + + assert_error_messages "1__1", error_messages + assert_error_messages "0b1__1", error_messages + assert_error_messages "0o1__1", error_messages + assert_error_messages "01__1", error_messages + assert_error_messages "0d1__1", error_messages + assert_error_messages "0x1__1", error_messages + + assert_error_messages "1_1_", error_messages + assert_error_messages "0b1_1_", error_messages + assert_error_messages "0o1_1_", error_messages + assert_error_messages "01_1_", error_messages + assert_error_messages "0d1_1_", error_messages + assert_error_messages "0x1_1_", error_messages + end + private def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby") diff --git a/yarp/diagnostic.c b/yarp/diagnostic.c index b6436f135c..9bbc30edee 100644 --- a/yarp/diagnostic.c +++ b/yarp/diagnostic.c @@ -164,6 +164,7 @@ static const char* const diagnostic_messages[YP_DIAGNOSTIC_ID_LEN] = { [YP_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number", [YP_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number", [YP_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number", + [YP_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number", [YP_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT? [YP_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT? [YP_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable", diff --git a/yarp/diagnostic.h b/yarp/diagnostic.h index 9aa21b0b0c..a4b030adfd 100644 --- a/yarp/diagnostic.h +++ b/yarp/diagnostic.h @@ -130,6 +130,7 @@ typedef enum { YP_ERR_INVALID_NUMBER_DECIMAL, YP_ERR_INVALID_NUMBER_HEXADECIMAL, YP_ERR_INVALID_NUMBER_OCTAL, + YP_ERR_INVALID_NUMBER_UNDERSCORE, YP_ERR_INVALID_PERCENT, YP_ERR_INVALID_TOKEN, YP_ERR_INVALID_VARIABLE_GLOBAL, diff --git a/yarp/util/yp_char.c b/yarp/util/yp_char.c index ae0ffea6b8..42c3896626 100644 --- a/yarp/util/yp_char.c +++ b/yarp/util/yp_char.c @@ -123,6 +123,9 @@ yp_char_is_inline_whitespace(const uint8_t b) { return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE); } +// Scan through the string and return the number of characters at the start of +// the string that match the given kind. Disallows searching past the given +// maximum number of characters. static inline size_t yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { if (length <= 0) return 0; @@ -134,20 +137,57 @@ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { return size; } +// Scan through the string and return the number of characters at the start of +// the string that match the given kind. Disallows searching past the given +// maximum number of characters. +// +// Additionally, report the location of the last invalid underscore character +// found in the string through the out invalid parameter. +static inline size_t +yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) { + if (length <= 0) return 0; + + size_t size = 0; + size_t maximum = (size_t) length; + + bool underscore = false; + while (size < maximum && (yp_number_table[string[size]] & kind)) { + if (string[size] == '_') { + if (underscore) *invalid = string + size; + underscore = true; + } else { + underscore = false; + } + + size++; + } + + if (string[size - 1] == '_') *invalid = string + size - 1; + return size; +} + // Returns the number of characters at the start of the string that are binary // digits or underscores. Disallows searching past the given maximum number of // characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. size_t -yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) { - return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER); +yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER); } // Returns the number of characters at the start of the string that are octal -// digits or underscores. Disallows searching past the given maximum number of +// digits or underscores. Disallows searching past the given maximum number of // characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. size_t -yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) { - return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER); +yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER); } // Returns the number of characters at the start of the string that are decimal @@ -160,9 +200,13 @@ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) { // Returns the number of characters at the start of the string that are decimal // digits or underscores. Disallows searching past the given maximum number of // characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. size_t -yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) { - return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER); +yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER); } // Returns the number of characters at the start of the string that are @@ -176,9 +220,13 @@ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) { // Returns the number of characters at the start of the string that are // hexadecimal digits or underscores. Disallows searching past the given maximum // number of characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. size_t -yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) { - return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER); +yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER); } static inline bool diff --git a/yarp/util/yp_char.h b/yarp/util/yp_char.h index e155b69d64..f08d6a8c9d 100644 --- a/yarp/util/yp_char.h +++ b/yarp/util/yp_char.h @@ -31,19 +31,31 @@ size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length); size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length); // Returns the number of characters at the start of the string that are octal -// digits or underscores. Disallows searching past the given maximum number of +// digits or underscores. Disallows searching past the given maximum number of // characters. -size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length); +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); // Returns the number of characters at the start of the string that are decimal // digits or underscores. Disallows searching past the given maximum number of // characters. -size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length); +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); // Returns the number of characters at the start of the string that are // hexadecimal digits or underscores. Disallows searching past the given maximum // number of characters. -size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length); +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); // Returns the number of characters at the start of the string that are regexp // options. Disallows searching past the given maximum number of characters. @@ -52,7 +64,11 @@ size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); // Returns the number of characters at the start of the string that are binary // digits or underscores. Disallows searching past the given maximum number of // characters. -size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length); +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); // Returns true if the given character is a whitespace character. bool yp_char_is_whitespace(const uint8_t b); diff --git a/yarp/yarp.c b/yarp/yarp.c index bf3b3b79cb..b343566ee0 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -5330,6 +5330,45 @@ context_def_p(yp_parser_t *parser) { /* Specific token lexers */ /******************************************************************************/ +static void +yp_strspn_number_validate(yp_parser_t *parser, const uint8_t *invalid) { + if (invalid != NULL) { + yp_diagnostic_list_append(&parser->error_list, invalid, invalid + 1, YP_ERR_INVALID_NUMBER_UNDERSCORE); + } +} + +static size_t +yp_strspn_binary_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_binary_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static size_t +yp_strspn_octal_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_octal_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static size_t +yp_strspn_decimal_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_decimal_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static size_t +yp_strspn_hexadecimal_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_hexadecimal_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + static yp_token_type_t lex_optional_float_suffix(yp_parser_t *parser) { yp_token_type_t type = YP_TOKEN_INTEGER; @@ -5339,7 +5378,7 @@ lex_optional_float_suffix(yp_parser_t *parser) { if (peek(parser) == '.') { if (yp_char_is_decimal_digit(peek_offset(parser, 1))) { parser->current.end += 2; - parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); type = YP_TOKEN_FLOAT; } else { // If we had a . and then something else, then it's not a float suffix on @@ -5355,7 +5394,7 @@ lex_optional_float_suffix(yp_parser_t *parser) { if (yp_char_is_decimal_digit(*parser->current.end)) { parser->current.end++; - parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); type = YP_TOKEN_FLOAT; } else { yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_FLOAT_EXPONENT); @@ -5377,7 +5416,7 @@ lex_numeric_prefix(yp_parser_t *parser) { case 'D': parser->current.end++; if (yp_char_is_decimal_digit(peek(parser))) { - parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); } else { yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_DECIMAL); } @@ -5389,7 +5428,7 @@ lex_numeric_prefix(yp_parser_t *parser) { case 'B': parser->current.end++; if (yp_char_is_binary_digit(peek(parser))) { - parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_binary_number_validate(parser, parser->current.end); } else { yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_BINARY); } @@ -5402,7 +5441,7 @@ lex_numeric_prefix(yp_parser_t *parser) { case 'O': parser->current.end++; if (yp_char_is_octal_digit(peek(parser))) { - parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end); } else { yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_OCTAL); } @@ -5420,7 +5459,7 @@ lex_numeric_prefix(yp_parser_t *parser) { case '5': case '6': case '7': - parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end); parser->integer_base = YP_INTEGER_BASE_FLAGS_OCTAL; break; @@ -5429,7 +5468,7 @@ lex_numeric_prefix(yp_parser_t *parser) { case 'X': parser->current.end++; if (yp_char_is_hexadecimal_digit(peek(parser))) { - parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_hexadecimal_number_validate(parser, parser->current.end); } else { yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_HEXADECIMAL); } @@ -5453,18 +5492,12 @@ lex_numeric_prefix(yp_parser_t *parser) { } else { // If it didn't start with a 0, then we'll lex as far as we can into a // decimal number. - parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end); + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); // Afterward, we'll lex as far as we can into an optional float suffix. type = lex_optional_float_suffix(parser); } - // If the last character that we consumed was an underscore, then this is - // actually an invalid integer value, and we should return an invalid token. - if (peek_offset(parser, -1) == '_') { - yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_NUMBER_LITERAL_UNDERSCORE); - } - return type; }