зеркало из https://github.com/github/ruby.git
[PRISM] Implement regex encoding flags
Added the correct encoding to the allocated regex. This required making a new method to set the encoding and pass that to `rb_enc_reg_new` instead of `rb_reg_new`. The former `rb_reg_new` would set the encoding to ASCII8BIT regardless of encoding flag.
This commit is contained in:
Родитель
2573d56884
Коммит
a082e560bb
|
@ -158,7 +158,7 @@ parse_imaginary(pm_imaginary_node_t *node)
|
|||
}
|
||||
|
||||
static inline VALUE
|
||||
parse_string(pm_string_t *string, pm_parser_t *parser)
|
||||
parse_string(pm_string_t *string, const pm_parser_t *parser)
|
||||
{
|
||||
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
|
||||
return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), enc);
|
||||
|
@ -190,6 +190,8 @@ pm_optimizable_range_item_p(pm_node_t *node)
|
|||
return (!node || PM_NODE_TYPE_P(node, PM_INTEGER_NODE) || PM_NODE_TYPE_P(node, PM_NIL_NODE));
|
||||
}
|
||||
|
||||
#define RE_OPTION_ENCODING_SHIFT 8
|
||||
|
||||
/**
|
||||
* Check the prism flags of a regular expression-like node and return the flags
|
||||
* that are expected by the CRuby VM.
|
||||
|
@ -197,6 +199,29 @@ pm_optimizable_range_item_p(pm_node_t *node)
|
|||
static int
|
||||
pm_reg_flags(const pm_node_t *node) {
|
||||
int flags = 0;
|
||||
int dummy = 0;
|
||||
|
||||
// Check "no encoding" first so that flags don't get clobbered
|
||||
// We're calling `rb_char_to_option_kcode` in this case so that
|
||||
// we don't need to have access to `ARG_ENCODING_NONE`
|
||||
if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
|
||||
rb_char_to_option_kcode('n', &flags, &dummy);
|
||||
}
|
||||
|
||||
if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
|
||||
rb_char_to_option_kcode('e', &flags, &dummy);
|
||||
flags |= ('e' << RE_OPTION_ENCODING_SHIFT);
|
||||
}
|
||||
|
||||
if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
|
||||
rb_char_to_option_kcode('s', &flags, &dummy);
|
||||
flags |= ('s' << RE_OPTION_ENCODING_SHIFT);
|
||||
}
|
||||
|
||||
if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
|
||||
rb_char_to_option_kcode('u', &flags, &dummy);
|
||||
flags |= ('u' << RE_OPTION_ENCODING_SHIFT);
|
||||
}
|
||||
|
||||
if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) {
|
||||
flags |= ONIG_OPTION_IGNORECASE;
|
||||
|
@ -213,6 +238,27 @@ pm_reg_flags(const pm_node_t *node) {
|
|||
return flags;
|
||||
}
|
||||
|
||||
static rb_encoding *
|
||||
pm_reg_enc(const pm_regular_expression_node_t *node, const pm_parser_t *parser) {
|
||||
if (node->base.flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
|
||||
return rb_ascii8bit_encoding();
|
||||
}
|
||||
|
||||
if (node->base.flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
|
||||
return rb_enc_get_from_index(ENCINDEX_EUC_JP);
|
||||
}
|
||||
|
||||
if (node->base.flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
|
||||
return rb_enc_get_from_index(ENCINDEX_Windows_31J);
|
||||
}
|
||||
|
||||
if (node->base.flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
|
||||
return rb_utf8_encoding();
|
||||
}
|
||||
|
||||
return rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Certain nodes can be compiled literally, which can lead to further
|
||||
* optimizations. These nodes will all have the PM_NODE_FLAG_STATIC_LITERAL flag
|
||||
|
@ -224,6 +270,14 @@ pm_static_literal_p(const pm_node_t *node)
|
|||
return node->flags & PM_NODE_FLAG_STATIC_LITERAL;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
pm_new_regex(pm_regular_expression_node_t * cast, const pm_parser_t * parser) {
|
||||
VALUE regex_str = parse_string(&cast->unescaped, parser);
|
||||
rb_encoding * enc = pm_reg_enc(cast, parser);
|
||||
|
||||
return rb_enc_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), enc, pm_reg_flags((const pm_node_t *)cast));
|
||||
}
|
||||
|
||||
/**
|
||||
* Certain nodes can be compiled literally. This function returns the literal
|
||||
* value described by the given node. For example, an array node with all static
|
||||
|
@ -283,8 +337,7 @@ pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node, pm_p
|
|||
case PM_REGULAR_EXPRESSION_NODE: {
|
||||
pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node;
|
||||
|
||||
VALUE string = parse_string(&cast->unescaped, parser);
|
||||
return rb_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), pm_reg_flags(node));
|
||||
return pm_new_regex(cast, parser);
|
||||
}
|
||||
case PM_SOURCE_ENCODING_NODE: {
|
||||
rb_encoding *encoding = rb_find_encoding(rb_str_new_cstr(scope_node->parser->encoding.name));
|
||||
|
@ -2797,8 +2850,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
if (!popped) {
|
||||
pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node;
|
||||
|
||||
VALUE regex_str = parse_string(&cast->unescaped, parser);
|
||||
VALUE regex = rb_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), pm_reg_flags(node));
|
||||
VALUE regex = pm_new_regex(cast, parser);
|
||||
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, regex);
|
||||
}
|
||||
|
|
|
@ -390,6 +390,14 @@ module Prism
|
|||
assert_prism_eval('/pit/mx')
|
||||
assert_prism_eval('/pit/xi')
|
||||
assert_prism_eval('/pit/ixm')
|
||||
|
||||
assert_prism_eval('/pit/u')
|
||||
assert_prism_eval('/pit/e')
|
||||
assert_prism_eval('/pit/s')
|
||||
assert_prism_eval('/pit/n')
|
||||
|
||||
assert_prism_eval('/pit/me')
|
||||
assert_prism_eval('/pit/ne')
|
||||
end
|
||||
|
||||
def test_StringConcatNode
|
||||
|
|
Загрузка…
Ссылка в новой задаче