зеркало из https://github.com/github/ruby.git
[ruby/prism] Documentation for diagnostics and regexp
https://github.com/ruby/prism/commit/16e0579044
This commit is contained in:
Родитель
affa6714bc
Коммит
1de05631b5
|
@ -1,56 +1,55 @@
|
|||
#include "prism/diagnostic.h"
|
||||
|
||||
/*
|
||||
## Message composition
|
||||
|
||||
When composing an error message, use sentence fragments.
|
||||
|
||||
Try describing the property of the code that caused the error, rather than the rule that is being
|
||||
violated. It may help to use a fragment that completes a sentence beginning, "The parser
|
||||
encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
|
||||
context) after a semicolon.
|
||||
|
||||
For example:, instead of "Control escape sequence cannot be doubled", prefer:
|
||||
|
||||
> "Invalid control escape sequence; control cannot be repeated"
|
||||
|
||||
In some cases, where the failure is more general or syntax expectations are violated, it may make
|
||||
more sense to use a fragment that completes a sentence beginning, "The parser ...".
|
||||
|
||||
For example:
|
||||
|
||||
> "Expected an expression after `(`"
|
||||
> "Cannot parse the expression"
|
||||
|
||||
|
||||
## Message style guide
|
||||
|
||||
- Use articles like "a", "an", and "the" when appropriate.
|
||||
- e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
|
||||
- Use the common name for tokens and nodes.
|
||||
- e.g., prefer "keyword splat" to "assoc splat"
|
||||
- e.g., prefer "embedded document" to "embdoc"
|
||||
- Capitalize the initial word of the message.
|
||||
- Use back ticks around token literals
|
||||
- e.g., "Expected a `=>` between the hash key and value"
|
||||
- Do not use `.` or other punctuation at the end of the message.
|
||||
- Do not use contractions like "can't". Prefer "cannot" to "can not".
|
||||
- For tokens that can have multiple meanings, reference the token and its meaning.
|
||||
- e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
|
||||
|
||||
|
||||
## Error names (PM_ERR_*)
|
||||
|
||||
- When appropriate, prefer node name to token name.
|
||||
- e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
|
||||
- Prefer token name to common name.
|
||||
- e.g., prefer "STAR" to "ASTERISK".
|
||||
- Try to order the words in the name from more general to more specific,
|
||||
- e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
|
||||
- When in doubt, look for similar patterns and name them so that they are grouped when lexically
|
||||
sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
|
||||
*/
|
||||
|
||||
/**
|
||||
* ## Message composition
|
||||
*
|
||||
* When composing an error message, use sentence fragments.
|
||||
*
|
||||
* Try describing the property of the code that caused the error, rather than the rule that is being
|
||||
* violated. It may help to use a fragment that completes a sentence beginning, "The parser
|
||||
* encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
|
||||
* context) after a semicolon.
|
||||
*
|
||||
* For example:, instead of "Control escape sequence cannot be doubled", prefer:
|
||||
*
|
||||
* > "Invalid control escape sequence; control cannot be repeated"
|
||||
*
|
||||
* In some cases, where the failure is more general or syntax expectations are violated, it may make
|
||||
* more sense to use a fragment that completes a sentence beginning, "The parser ...".
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* > "Expected an expression after `(`"
|
||||
* > "Cannot parse the expression"
|
||||
*
|
||||
*
|
||||
* ## Message style guide
|
||||
*
|
||||
* - Use articles like "a", "an", and "the" when appropriate.
|
||||
* - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
|
||||
* - Use the common name for tokens and nodes.
|
||||
* - e.g., prefer "keyword splat" to "assoc splat"
|
||||
* - e.g., prefer "embedded document" to "embdoc"
|
||||
* - Capitalize the initial word of the message.
|
||||
* - Use back ticks around token literals
|
||||
* - e.g., "Expected a `=>` between the hash key and value"
|
||||
* - Do not use `.` or other punctuation at the end of the message.
|
||||
* - Do not use contractions like "can't". Prefer "cannot" to "can not".
|
||||
* - For tokens that can have multiple meanings, reference the token and its meaning.
|
||||
* - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
|
||||
*
|
||||
*
|
||||
* ## Error names (PM_ERR_*)
|
||||
*
|
||||
* - When appropriate, prefer node name to token name.
|
||||
* - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
|
||||
* - Prefer token name to common name.
|
||||
* - e.g., prefer "STAR" to "ASTERISK".
|
||||
* - Try to order the words in the name from more general to more specific,
|
||||
* - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
|
||||
* - When in doubt, look for similar patterns and name them so that they are grouped when lexically
|
||||
* sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
|
||||
*/
|
||||
static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
||||
[PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
|
||||
[PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
|
||||
|
@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
|
|||
return message;
|
||||
}
|
||||
|
||||
// Append an error to the given list of diagnostic.
|
||||
/**
|
||||
* Append an error to the given list of diagnostic.
|
||||
*/
|
||||
bool
|
||||
pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
||||
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
|
||||
|
@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
|
|||
return true;
|
||||
}
|
||||
|
||||
// Deallocate the internal state of the given diagnostic list.
|
||||
/**
|
||||
* Deallocate the internal state of the given diagnostic list.
|
||||
*/
|
||||
void
|
||||
pm_diagnostic_list_free(pm_list_t *list) {
|
||||
pm_list_node_t *node, *next;
|
||||
|
|
|
@ -20,6 +20,10 @@ typedef struct {
|
|||
const char *message;
|
||||
} pm_diagnostic_t;
|
||||
|
||||
/**
|
||||
* The diagnostic IDs of all of the diagnostics, used to communicate the types
|
||||
* of errors between the parser and the user.
|
||||
*/
|
||||
typedef enum {
|
||||
PM_ERR_ALIAS_ARGUMENT,
|
||||
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
|
||||
|
@ -223,14 +227,27 @@ typedef enum {
|
|||
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
||||
PM_WARN_AMBIGUOUS_PREFIX_STAR,
|
||||
PM_WARN_AMBIGUOUS_SLASH,
|
||||
|
||||
/* This must be the last member. */
|
||||
PM_DIAGNOSTIC_ID_LEN,
|
||||
} pm_diagnostic_id_t;
|
||||
|
||||
// Append a diagnostic to the given list of diagnostics.
|
||||
/**
|
||||
* Append a diagnostic to the given list of diagnostics.
|
||||
*
|
||||
* @param list The list to append to.
|
||||
* @param start The start of the diagnostic.
|
||||
* @param end The end of the diagnostic.
|
||||
* @param diag_id The diagnostic ID.
|
||||
* @return Whether the diagnostic was successfully appended.
|
||||
*/
|
||||
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
|
||||
|
||||
// Deallocate the internal state of the given diagnostic list.
|
||||
/**
|
||||
* Deallocate the internal state of the given diagnostic list.
|
||||
*
|
||||
* @param list The list to deallocate.
|
||||
*/
|
||||
void pm_diagnostic_list_free(pm_list_t *list);
|
||||
|
||||
#endif
|
||||
|
|
234
prism/regexp.c
234
prism/regexp.c
|
@ -1,6 +1,8 @@
|
|||
#include "prism/regexp.h"
|
||||
|
||||
// This is the parser that is going to handle parsing regular expressions.
|
||||
/**
|
||||
* This is the parser that is going to handle parsing regular expressions.
|
||||
*/
|
||||
typedef struct {
|
||||
const uint8_t *start;
|
||||
const uint8_t *cursor;
|
||||
|
@ -10,7 +12,9 @@ typedef struct {
|
|||
pm_encoding_t *encoding;
|
||||
} pm_regexp_parser_t;
|
||||
|
||||
// This initializes a new parser with the given source.
|
||||
/**
|
||||
* This initializes a new parser with the given source.
|
||||
*/
|
||||
static void
|
||||
pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
|
||||
*parser = (pm_regexp_parser_t) {
|
||||
|
@ -23,7 +27,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui
|
|||
};
|
||||
}
|
||||
|
||||
// This appends a new string to the list of named captures.
|
||||
/**
|
||||
* This appends a new string to the list of named captures.
|
||||
*/
|
||||
static void
|
||||
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
||||
pm_string_t string;
|
||||
|
@ -32,13 +38,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start,
|
|||
pm_string_free(&string);
|
||||
}
|
||||
|
||||
// Returns true if the next character is the end of the source.
|
||||
/**
|
||||
* Returns true if the next character is the end of the source.
|
||||
*/
|
||||
static inline bool
|
||||
pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
|
||||
return parser->cursor >= parser->end;
|
||||
}
|
||||
|
||||
// Optionally accept a char and consume it if it exists.
|
||||
/**
|
||||
* Optionally accept a char and consume it if it exists.
|
||||
*/
|
||||
static inline bool
|
||||
pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
|
||||
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
||||
|
@ -48,7 +58,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Expect a character to be present and consume it.
|
||||
/**
|
||||
* Expect a character to be present and consume it.
|
||||
*/
|
||||
static inline bool
|
||||
pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
|
||||
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
||||
|
@ -58,7 +70,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// This advances the current token to the next instance of the given character.
|
||||
/**
|
||||
* This advances the current token to the next instance of the given character.
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
|
||||
if (pm_regexp_char_is_eof(parser)) {
|
||||
|
@ -74,37 +88,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Range quantifiers are a special class of quantifiers that look like
|
||||
//
|
||||
// * {digit}
|
||||
// * {digit,}
|
||||
// * {digit,digit}
|
||||
// * {,digit}
|
||||
//
|
||||
// Unfortunately, if there are any spaces in between, then this just becomes a
|
||||
// regular character match expression and we have to backtrack. So when this
|
||||
// function first starts running, we'll create a "save" point and then attempt
|
||||
// to parse the quantifier. If it fails, we'll restore the save point and
|
||||
// return.
|
||||
//
|
||||
// The properly track everything, we're going to build a little state machine.
|
||||
// It looks something like the following:
|
||||
//
|
||||
// ┌───────┐ ┌─────────┐ ────────────┐
|
||||
// ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
|
||||
// └───────┘ └─────────┘ <─── digit ─┘
|
||||
// │ │ │
|
||||
// ┌───────┐ │ │ rbrace
|
||||
// │ comma │ <───── comma ┌──── comma ───────┘ │
|
||||
// └───────┘ V V
|
||||
// │ ┌─────────┐ ┌─────────┐
|
||||
// └── digit ──> │ maximum │ ── rbrace ──> │| final |│
|
||||
// └─────────┘ └─────────┘
|
||||
// │ ^
|
||||
// └─ digit ─┘
|
||||
//
|
||||
// Note that by the time we've hit this function, the lbrace has already been
|
||||
// consumed so we're in the start state.
|
||||
/**
|
||||
* Range quantifiers are a special class of quantifiers that look like
|
||||
*
|
||||
* * {digit}
|
||||
* * {digit,}
|
||||
* * {digit,digit}
|
||||
* * {,digit}
|
||||
*
|
||||
* Unfortunately, if there are any spaces in between, then this just becomes a
|
||||
* regular character match expression and we have to backtrack. So when this
|
||||
* function first starts running, we'll create a "save" point and then attempt
|
||||
* to parse the quantifier. If it fails, we'll restore the save point and
|
||||
* return.
|
||||
*
|
||||
* The properly track everything, we're going to build a little state machine.
|
||||
* It looks something like the following:
|
||||
*
|
||||
* ┌───────┐ ┌─────────┐ ────────────┐
|
||||
* ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
|
||||
* └───────┘ └─────────┘ <─── digit ─┘
|
||||
* │ │ │
|
||||
* ┌───────┐ │ │ rbrace
|
||||
* │ comma │ <───── comma ┌──── comma ───────┘ │
|
||||
* └───────┘ V V
|
||||
* │ ┌─────────┐ ┌─────────┐
|
||||
* └── digit ──> │ maximum │ ── rbrace ──> │| final |│
|
||||
* └─────────┘ └─────────┘
|
||||
* │ ^
|
||||
* └─ digit ─┘
|
||||
*
|
||||
* Note that by the time we've hit this function, the lbrace has already been
|
||||
* consumed so we're in the start state.
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
||||
const uint8_t *savepoint = parser->cursor;
|
||||
|
@ -180,12 +196,14 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// quantifier : star-quantifier
|
||||
// | plus-quantifier
|
||||
// | optional-quantifier
|
||||
// | range-quantifier
|
||||
// | <empty>
|
||||
// ;
|
||||
/**
|
||||
* quantifier : star-quantifier
|
||||
* | plus-quantifier
|
||||
* | optional-quantifier
|
||||
* | range-quantifier
|
||||
* | <empty>
|
||||
* ;
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
||||
if (pm_regexp_char_is_eof(parser)) return true;
|
||||
|
@ -205,8 +223,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
|||
}
|
||||
}
|
||||
|
||||
// match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
|
||||
// ;
|
||||
/**
|
||||
* match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
|
||||
* ;
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
||||
if (!pm_regexp_char_expect(parser, ':')) {
|
||||
|
@ -226,8 +246,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
|||
static bool
|
||||
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
|
||||
|
||||
// match-char-set : '[' '^'? (match-range | match-char)* ']'
|
||||
// ;
|
||||
/**
|
||||
* match-char-set : '[' '^'? (match-range | match-char)* ']'
|
||||
* ;
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
||||
pm_regexp_char_accept(parser, '^');
|
||||
|
@ -251,7 +273,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
|||
return pm_regexp_char_expect(parser, ']');
|
||||
}
|
||||
|
||||
// A left bracket can either mean a POSIX class or a character set.
|
||||
/**
|
||||
* A left bracket can either mean a POSIX class or a character set.
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
||||
const uint8_t *reset = parser->cursor;
|
||||
|
@ -271,8 +295,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
|||
static bool
|
||||
pm_regexp_parse_expression(pm_regexp_parser_t *parser);
|
||||
|
||||
// These are the states of the options that are configurable on the regular
|
||||
// expression (or from within a group).
|
||||
/**
|
||||
* These are the states of the options that are configurable on the regular
|
||||
* expression (or from within a group).
|
||||
*/
|
||||
typedef enum {
|
||||
PM_REGEXP_OPTION_STATE_INVALID,
|
||||
PM_REGEXP_OPTION_STATE_TOGGLEABLE,
|
||||
|
@ -283,16 +309,21 @@ typedef enum {
|
|||
|
||||
// These are the options that are configurable on the regular expression (or
|
||||
// from within a group).
|
||||
|
||||
#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
|
||||
#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
|
||||
#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
|
||||
|
||||
// This is the set of options that are configurable on the regular expression.
|
||||
/**
|
||||
* This is the set of options that are configurable on the regular expression.
|
||||
*/
|
||||
typedef struct {
|
||||
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
|
||||
} pm_regexp_options_t;
|
||||
|
||||
// Initialize a new set of options to their default values.
|
||||
/**
|
||||
* Initialize a new set of options to their default values.
|
||||
*/
|
||||
static void
|
||||
pm_regexp_options_init(pm_regexp_options_t *options) {
|
||||
memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
|
||||
|
@ -304,8 +335,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) {
|
|||
options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
|
||||
}
|
||||
|
||||
// Attempt to add the given option to the set of options. Returns true if it was
|
||||
// added, false if it was already present.
|
||||
/**
|
||||
* Attempt to add the given option to the set of options. Returns true if it was
|
||||
* added, false if it was already present.
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
|
||||
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
||||
|
@ -327,8 +360,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Attempt to remove the given option from the set of options. Returns true if
|
||||
// it was removed, false if it was already absent.
|
||||
/**
|
||||
* Attempt to remove the given option from the set of options. Returns true if
|
||||
* it was removed, false if it was already absent.
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
||||
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
||||
|
@ -349,26 +384,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Groups can have quite a few different patterns for syntax. They basically
|
||||
// just wrap a set of expressions, but they can potentially have options after a
|
||||
// question mark. If there _isn't_ a question mark, then it's just a set of
|
||||
// expressions. If there _is_, then here are the options:
|
||||
//
|
||||
// * (?#...) - inline comments
|
||||
// * (?:subexp) - non-capturing group
|
||||
// * (?=subexp) - positive lookahead
|
||||
// * (?!subexp) - negative lookahead
|
||||
// * (?>subexp) - atomic group
|
||||
// * (?~subexp) - absence operator
|
||||
// * (?<=subexp) - positive lookbehind
|
||||
// * (?<!subexp) - negative lookbehind
|
||||
// * (?<name>subexp) - named capturing group
|
||||
// * (?'name'subexp) - named capturing group
|
||||
// * (?(cond)yes-subexp) - conditional expression
|
||||
// * (?(cond)yes-subexp|no-subexp) - conditional expression
|
||||
// * (?imxdau-imx) - turn on and off configuration
|
||||
// * (?imxdau-imx:subexp) - turn on and off configuration for an expression
|
||||
//
|
||||
/**
|
||||
* Groups can have quite a few different patterns for syntax. They basically
|
||||
* just wrap a set of expressions, but they can potentially have options after a
|
||||
* question mark. If there _isn't_ a question mark, then it's just a set of
|
||||
* expressions. If there _is_, then here are the options:
|
||||
*
|
||||
* * (?#...) - inline comments
|
||||
* * (?:subexp) - non-capturing group
|
||||
* * (?=subexp) - positive lookahead
|
||||
* * (?!subexp) - negative lookahead
|
||||
* * (?>subexp) - atomic group
|
||||
* * (?~subexp) - absence operator
|
||||
* * (?<=subexp) - positive lookbehind
|
||||
* * (?<!subexp) - negative lookbehind
|
||||
* * (?<name>subexp) - named capturing group
|
||||
* * (?'name'subexp) - named capturing group
|
||||
* * (?(cond)yes-subexp) - conditional expression
|
||||
* * (?(cond)yes-subexp|no-subexp) - conditional expression
|
||||
* * (?imxdau-imx) - turn on and off configuration
|
||||
* * (?imxdau-imx:subexp) - turn on and off configuration for an expression
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
||||
// First, parse any options for the group.
|
||||
|
@ -503,16 +539,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|||
return pm_regexp_char_expect(parser, ')');
|
||||
}
|
||||
|
||||
// item : anchor
|
||||
// | match-posix-class
|
||||
// | match-char-set
|
||||
// | match-char-class
|
||||
// | match-char-prop
|
||||
// | match-char
|
||||
// | match-any
|
||||
// | group
|
||||
// | quantified
|
||||
// ;
|
||||
/**
|
||||
* item : anchor
|
||||
* | match-posix-class
|
||||
* | match-char-set
|
||||
* | match-char-class
|
||||
* | match-char-prop
|
||||
* | match-char
|
||||
* | match-any
|
||||
* | group
|
||||
* | quantified
|
||||
* ;
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
||||
switch (*parser->cursor++) {
|
||||
|
@ -533,8 +571,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
|||
}
|
||||
}
|
||||
|
||||
// expression : item+
|
||||
// ;
|
||||
/**
|
||||
* expression : item+
|
||||
* ;
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
||||
if (!pm_regexp_parse_item(parser)) {
|
||||
|
@ -550,10 +590,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// pattern : EOF
|
||||
// | expression EOF
|
||||
// | expression '|' pattern
|
||||
// ;
|
||||
/**
|
||||
* pattern : EOF
|
||||
* | expression EOF
|
||||
* | expression '|' pattern
|
||||
* ;
|
||||
*/
|
||||
static bool
|
||||
pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
||||
return (
|
||||
|
@ -572,8 +614,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
|||
);
|
||||
}
|
||||
|
||||
// Parse a regular expression and extract the names of all of the named capture
|
||||
// groups.
|
||||
/**
|
||||
* Parse a regular expression and extract the names of all of the named capture
|
||||
* groups.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION bool
|
||||
pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
|
||||
pm_regexp_parser_t parser;
|
||||
|
|
|
@ -12,8 +12,17 @@
|
|||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
// Parse a regular expression and extract the names of all of the named capture
|
||||
// groups.
|
||||
/**
|
||||
* Parse a regular expression and extract the names of all of the named capture
|
||||
* groups.
|
||||
*
|
||||
* @param source The source code to parse.
|
||||
* @param size The size of the source code.
|
||||
* @param named_captures The list to add the names of the named capture groups.
|
||||
* @param encoding_changed Whether or not the encoding changed from the default.
|
||||
* @param encoding The encoding of the source code.
|
||||
* @return Whether or not the parsing was successful.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
|
||||
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче