[ruby/prism] Warn for maximum number variables

https://github.com/ruby/prism/commit/2cdbf81c95
This commit is contained in:
Kevin Newton 2024-03-13 14:22:31 -04:00 коммит произвёл git
Родитель f0b5d0ad54
Коммит 2cfcebb2a8
4 изменённых файлов: 52 добавлений и 38 удалений

Просмотреть файл

@ -247,6 +247,7 @@ warnings:
- FLOAT_OUT_OF_RANGE
- INTEGER_IN_FLIP_FLOP
- INVALID_CHARACTER
- INVALID_NUMBERED_REFERENCE
- KEYWORD_EOL
- LITERAL_IN_CONDITION_DEFAULT
- LITERAL_IN_CONDITION_VERBOSE
@ -2677,13 +2678,13 @@ nodes:
- name: number
type: uint32
comment: |
The (1-indexed, from the left) number of the capture group. Numbered references that would overflow a `uint32` result in a `number` of exactly `2**32 - 1`.
The (1-indexed, from the left) number of the capture group. Numbered references that are too large result in this value being `0`.
$1 # number `1`
$5432 # number `5432`
$4294967296 # number `4294967295`
$4294967296 # number `0`
comment: |
Represents reading a numbered reference to a capture in the previous match.

Просмотреть файл

@ -10,6 +10,7 @@
#define PRISM_DEFINES_H
#include <ctype.h>
#include <limits.h>
#include <math.h>
#include <stdarg.h>
#include <stddef.h>
@ -22,7 +23,6 @@
* some platforms they aren't included unless this is already defined.
*/
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
/**

Просмотреть файл

@ -1203,40 +1203,6 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
/* Node creation functions */
/******************************************************************************/
/**
* Parse the decimal number represented by the range of bytes. returns
* UINT32_MAX if the number fails to parse. This function assumes that the range
* of bytes has already been validated to contain only decimal digits.
*/
static uint32_t
parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
ptrdiff_t diff = end - start;
assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
size_t length = (size_t) diff;
char *digits = xcalloc(length + 1, sizeof(char));
memcpy(digits, start, length);
digits[length] = '\0';
char *endptr;
errno = 0;
unsigned long value = strtoul(digits, &endptr, 10);
if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
value = UINT32_MAX;
}
xfree(digits);
if (value > UINT32_MAX) {
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
value = UINT32_MAX;
}
return (uint32_t) value;
}
/**
* When you have an encoding flag on a regular expression, it takes precedence
* over all of the previously set encoding flags. So we need to mask off any
@ -5136,6 +5102,52 @@ pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *loc
return node;
}
/**
* The maximum numbered reference value is defined as the maximum value that an
* integer can hold minus 1 bit for CRuby instruction sequence operand tagging.
*/
#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
/**
* Parse the decimal number represented by the range of bytes. Returns
* 0 if the number fails to parse or if the number is greater than the maximum
* value representable by a numbered reference. This function assumes that the
* range of bytes has already been validated to contain only decimal digits.
*/
static uint32_t
pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
const uint8_t *start = token->start + 1;
const uint8_t *end = token->end;
ptrdiff_t diff = end - start;
assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
size_t length = (size_t) diff;
char *digits = xcalloc(length + 1, sizeof(char));
memcpy(digits, start, length);
digits[length] = '\0';
char *endptr;
errno = 0;
unsigned long value = strtoul(digits, &endptr, 10);
if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
value = 0;
}
xfree(digits);
if (value > NTH_REF_MAX) {
PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
value = 0;
}
return (uint32_t) value;
}
#undef NTH_REF_MAX
/**
* Allocate and initialize a new NthReferenceReadNode node.
*/
@ -5149,7 +5161,7 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na
.type = PM_NUMBERED_REFERENCE_READ_NODE,
.location = PM_LOCATION_TOKEN_VALUE(name),
},
.number = parse_decimal_number(parser, name->start + 1, name->end)
.number = pm_numbered_reference_read_node_number(parser, name)
};
return node;

Просмотреть файл

@ -327,6 +327,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE },
[PM_WARN_INTEGER_IN_FLIP_FLOP] = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_INVALID_CHARACTER] = { "invalid character syntax; use %s%s%s", PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_INVALID_NUMBERED_REFERENCE] = { "'%.*s' is too big for a number variable, always nil", PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_KEYWORD_EOL] = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE },
[PM_WARN_LITERAL_IN_CONDITION_DEFAULT] = { "%sliteral in %s", PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_LITERAL_IN_CONDITION_VERBOSE] = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE },