зеркало из https://github.com/github/ruby.git
[PRISM] Setup encodings in prism compiler
This commit is contained in:
Родитель
fab7018346
Коммит
01787d53bf
|
@ -143,28 +143,30 @@ parse_imaginary(pm_imaginary_node_t *node)
|
|||
}
|
||||
|
||||
static inline VALUE
|
||||
parse_string(pm_string_t *string)
|
||||
parse_string(pm_string_t *string, pm_parser_t *parser)
|
||||
{
|
||||
return rb_str_new((const char *) pm_string_source(string), pm_string_length(string));
|
||||
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
|
||||
return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), enc);
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_symbol(const uint8_t *start, const uint8_t *end)
|
||||
parse_symbol(const uint8_t *start, const uint8_t *end, pm_parser_t *parser)
|
||||
{
|
||||
return rb_intern2((const char *) start, end - start);
|
||||
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding.name));
|
||||
return rb_intern3((const char *) start, end - start, enc);
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_string_symbol(pm_string_t *string)
|
||||
parse_string_symbol(pm_string_t *string, pm_parser_t *parser)
|
||||
{
|
||||
const uint8_t *start = pm_string_source(string);
|
||||
return parse_symbol(start, start + pm_string_length(string));
|
||||
return parse_symbol(start, start + pm_string_length(string), parser);
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_location_symbol(pm_location_t *location)
|
||||
parse_location_symbol(pm_location_t *location, pm_parser_t *parser)
|
||||
{
|
||||
return parse_symbol(location->start, location->end);
|
||||
return parse_symbol(location->start, location->end, parser);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -213,7 +215,7 @@ pm_static_literal_p(const pm_node_t *node)
|
|||
* literal values can be compiled into a literal array.
|
||||
*/
|
||||
static inline VALUE
|
||||
pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node)
|
||||
pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node, pm_parser_t *parser)
|
||||
{
|
||||
// Every node that comes into this function should already be marked as
|
||||
// static literal. If it's not, then we have a bug somewhere.
|
||||
|
@ -226,7 +228,7 @@ pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node)
|
|||
|
||||
VALUE value = rb_ary_hidden_new(elements->size);
|
||||
for (size_t index = 0; index < elements->size; index++) {
|
||||
rb_ary_push(value, pm_static_literal_value(elements->nodes[index], scope_node));
|
||||
rb_ary_push(value, pm_static_literal_value(elements->nodes[index], scope_node, parser));
|
||||
}
|
||||
|
||||
OBJ_FREEZE(value);
|
||||
|
@ -244,7 +246,7 @@ pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node)
|
|||
for (size_t index = 0; index < elements->size; index++) {
|
||||
assert(PM_NODE_TYPE_P(elements->nodes[index], PM_ASSOC_NODE));
|
||||
pm_assoc_node_t *cast = (pm_assoc_node_t *) elements->nodes[index];
|
||||
VALUE pair[2] = { pm_static_literal_value(cast->key, scope_node), pm_static_literal_value(cast->value, scope_node) };
|
||||
VALUE pair[2] = { pm_static_literal_value(cast->key, scope_node, parser), pm_static_literal_value(cast->value, scope_node, parser) };
|
||||
rb_ary_cat(array, pair, 2);
|
||||
}
|
||||
|
||||
|
@ -266,7 +268,7 @@ pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node)
|
|||
case PM_REGULAR_EXPRESSION_NODE: {
|
||||
pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node;
|
||||
|
||||
VALUE string = parse_string(&cast->unescaped);
|
||||
VALUE string = parse_string(&cast->unescaped, parser);
|
||||
return rb_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), pm_reg_flags(node));
|
||||
}
|
||||
case PM_SOURCE_ENCODING_NODE: {
|
||||
|
@ -276,14 +278,14 @@ pm_static_literal_value(const pm_node_t *node, pm_scope_node_t *scope_node)
|
|||
}
|
||||
case PM_SOURCE_FILE_NODE: {
|
||||
pm_source_file_node_t *cast = (pm_source_file_node_t *)node;
|
||||
return cast->filepath.length ? parse_string(&cast->filepath) : rb_fstring_lit("<compiled>");
|
||||
return cast->filepath.length ? parse_string(&cast->filepath, parser) : rb_fstring_lit("<compiled>");
|
||||
}
|
||||
case PM_SOURCE_LINE_NODE:
|
||||
return INT2FIX((int) pm_newline_list_line_column(&scope_node->parser->newline_list, node->location.start).line);
|
||||
case PM_STRING_NODE:
|
||||
return parse_string(&((pm_string_node_t *) node)->unescaped);
|
||||
return parse_string(&((pm_string_node_t *) node)->unescaped, parser);
|
||||
case PM_SYMBOL_NODE:
|
||||
return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *) node)->unescaped));
|
||||
return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *) node)->unescaped, parser));
|
||||
case PM_TRUE_NODE:
|
||||
return Qtrue;
|
||||
default:
|
||||
|
@ -512,7 +514,7 @@ pm_compile_while(rb_iseq_t *iseq, int lineno, pm_node_flags_t flags, enum pm_nod
|
|||
}
|
||||
|
||||
static void
|
||||
pm_interpolated_node_compile(pm_node_list_t parts, rb_iseq_t *iseq, NODE dummy_line_node, LINK_ANCHOR *const ret, const uint8_t *src, bool popped, pm_scope_node_t *scope_node)
|
||||
pm_interpolated_node_compile(pm_node_list_t parts, rb_iseq_t *iseq, NODE dummy_line_node, LINK_ANCHOR *const ret, const uint8_t *src, bool popped, pm_scope_node_t *scope_node, pm_parser_t *parser)
|
||||
{
|
||||
size_t parts_size = parts.size;
|
||||
|
||||
|
@ -522,7 +524,7 @@ pm_interpolated_node_compile(pm_node_list_t parts, rb_iseq_t *iseq, NODE dummy_l
|
|||
|
||||
if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
|
||||
pm_string_node_t *string_node = (pm_string_node_t *) part;
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, parse_string(&string_node->unescaped));
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, parse_string(&string_node->unescaped, parser));
|
||||
}
|
||||
else {
|
||||
PM_COMPILE_NOT_POPPED(part);
|
||||
|
@ -859,8 +861,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
|
||||
ADD_INSN1(ret, &dummy_line_node, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE));
|
||||
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, ID2SYM(parse_location_symbol(&alias_node->new_name->location)));
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, ID2SYM(parse_location_symbol(&alias_node->old_name->location)));
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, ID2SYM(parse_location_symbol(&alias_node->new_name->location, parser)));
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, ID2SYM(parse_location_symbol(&alias_node->old_name->location, parser)));
|
||||
|
||||
ADD_SEND(ret, &dummy_line_node, id_core_set_variable_alias, INT2FIX(2));
|
||||
|
||||
|
@ -911,7 +913,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
// is popped, then we know we don't need to do anything since it's
|
||||
// statically known.
|
||||
if (!popped) {
|
||||
VALUE value = pm_static_literal_value(node, scope_node);
|
||||
VALUE value = pm_static_literal_value(node, scope_node, parser);
|
||||
ADD_INSN1(ret, &dummy_line_node, duparray, value);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, value);
|
||||
}
|
||||
|
@ -1556,7 +1558,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
// is popped, then we know we don't need to do anything since it's
|
||||
// statically known.
|
||||
if (!popped) {
|
||||
VALUE value = pm_static_literal_value(node, scope_node);
|
||||
VALUE value = pm_static_literal_value(node, scope_node, parser);
|
||||
ADD_INSN1(ret, &dummy_line_node, duphash, value);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, value);
|
||||
}
|
||||
|
@ -1714,7 +1716,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
}
|
||||
case PM_INTERPOLATED_MATCH_LAST_LINE_NODE: {
|
||||
pm_interpolated_match_last_line_node_t *cast = (pm_interpolated_match_last_line_node_t *) node;
|
||||
pm_interpolated_node_compile(cast->parts, iseq, dummy_line_node, ret, src, popped, scope_node);
|
||||
pm_interpolated_node_compile(cast->parts, iseq, dummy_line_node, ret, src, popped, scope_node, parser);
|
||||
|
||||
ADD_INSN2(ret, &dummy_line_node, toregexp, INT2FIX(pm_reg_flags(node)), INT2FIX((int) (cast->parts.size)));
|
||||
|
||||
|
@ -1726,7 +1728,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
}
|
||||
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: {
|
||||
pm_interpolated_regular_expression_node_t *cast = (pm_interpolated_regular_expression_node_t *) node;
|
||||
pm_interpolated_node_compile(cast->parts, iseq, dummy_line_node, ret, src, popped, scope_node);
|
||||
pm_interpolated_node_compile(cast->parts, iseq, dummy_line_node, ret, src, popped, scope_node, parser);
|
||||
|
||||
ADD_INSN2(ret, &dummy_line_node, toregexp, INT2FIX(pm_reg_flags(node)), INT2FIX((int) (cast->parts.size)));
|
||||
PM_POP_IF_POPPED;
|
||||
|
@ -1734,7 +1736,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
}
|
||||
case PM_INTERPOLATED_STRING_NODE: {
|
||||
pm_interpolated_string_node_t *interp_string_node = (pm_interpolated_string_node_t *) node;
|
||||
pm_interpolated_node_compile(interp_string_node->parts, iseq, dummy_line_node, ret, src, popped, scope_node);
|
||||
pm_interpolated_node_compile(interp_string_node->parts, iseq, dummy_line_node, ret, src, popped, scope_node, parser);
|
||||
|
||||
size_t parts_size = interp_string_node->parts.size;
|
||||
if (parts_size > 1) {
|
||||
|
@ -1746,7 +1748,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
}
|
||||
case PM_INTERPOLATED_SYMBOL_NODE: {
|
||||
pm_interpolated_symbol_node_t *interp_symbol_node = (pm_interpolated_symbol_node_t *) node;
|
||||
pm_interpolated_node_compile(interp_symbol_node->parts, iseq, dummy_line_node, ret, src, popped, scope_node);
|
||||
pm_interpolated_node_compile(interp_symbol_node->parts, iseq, dummy_line_node, ret, src, popped, scope_node, parser);
|
||||
|
||||
size_t parts_size = interp_symbol_node->parts.size;
|
||||
if (parts_size > 1) {
|
||||
|
@ -1765,7 +1767,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
case PM_INTERPOLATED_X_STRING_NODE: {
|
||||
pm_interpolated_x_string_node_t *interp_x_string_node = (pm_interpolated_x_string_node_t *) node;
|
||||
ADD_INSN(ret, &dummy_line_node, putself);
|
||||
pm_interpolated_node_compile(interp_x_string_node->parts, iseq, dummy_line_node, ret, src, false, scope_node);
|
||||
pm_interpolated_node_compile(interp_x_string_node->parts, iseq, dummy_line_node, ret, src, false, scope_node, parser);
|
||||
|
||||
size_t parts_size = interp_x_string_node->parts.size;
|
||||
if (parts_size > 1) {
|
||||
|
@ -1913,7 +1915,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
if (!popped) {
|
||||
pm_match_last_line_node_t *cast = (pm_match_last_line_node_t *) node;
|
||||
|
||||
VALUE regex_str = parse_string(&cast->unescaped);
|
||||
VALUE regex_str = parse_string(&cast->unescaped, parser);
|
||||
VALUE regex = rb_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), pm_reg_flags(node));
|
||||
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, regex);
|
||||
|
@ -2214,7 +2216,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
if (!popped) {
|
||||
pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node;
|
||||
|
||||
VALUE regex_str = parse_string(&cast->unescaped);
|
||||
VALUE regex_str = parse_string(&cast->unescaped, parser);
|
||||
VALUE regex = rb_reg_new(RSTRING_PTR(regex_str), RSTRING_LEN(regex_str), pm_reg_flags(node));
|
||||
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, regex);
|
||||
|
@ -2374,7 +2376,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
// reference the encoding object corresponding to the encoding of the
|
||||
// source file, and can be changed by a magic encoding comment.
|
||||
if (!popped) {
|
||||
VALUE value = pm_static_literal_value(node, scope_node);
|
||||
VALUE value = pm_static_literal_value(node, scope_node, parser);
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, value);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, value);
|
||||
}
|
||||
|
@ -2384,7 +2386,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
// Source file nodes are generated by the __FILE__ syntax. They
|
||||
// reference the file name of the source file.
|
||||
if (!popped) {
|
||||
VALUE value = pm_static_literal_value(node, scope_node);
|
||||
VALUE value = pm_static_literal_value(node, scope_node, parser);
|
||||
ADD_INSN1(ret, &dummy_line_node, putstring, value);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, value);
|
||||
}
|
||||
|
@ -2394,7 +2396,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
// Source line nodes are generated by the __LINE__ syntax. They
|
||||
// reference the line number where they occur in the source file.
|
||||
if (!popped) {
|
||||
VALUE value = pm_static_literal_value(node, scope_node);
|
||||
VALUE value = pm_static_literal_value(node, scope_node, parser);
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, value);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, value);
|
||||
}
|
||||
|
@ -2437,7 +2439,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
case PM_STRING_NODE: {
|
||||
if (!popped) {
|
||||
pm_string_node_t *string_node = (pm_string_node_t *) node;
|
||||
ADD_INSN1(ret, &dummy_line_node, putstring, parse_string(&string_node->unescaped));
|
||||
ADD_INSN1(ret, &dummy_line_node, putstring, parse_string(&string_node->unescaped, parser));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2445,7 +2447,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
// Symbols nodes are symbol literals with no interpolation. They are
|
||||
// always marked as static literals.
|
||||
if (!popped) {
|
||||
VALUE value = pm_static_literal_value(node, scope_node);
|
||||
VALUE value = pm_static_literal_value(node, scope_node, parser);
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, value);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, value);
|
||||
}
|
||||
|
@ -2506,7 +2508,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
|
|||
case PM_X_STRING_NODE: {
|
||||
pm_x_string_node_t *xstring_node = (pm_x_string_node_t *) node;
|
||||
ADD_INSN(ret, &dummy_line_node, putself);
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, parse_string(&xstring_node->unescaped));
|
||||
ADD_INSN1(ret, &dummy_line_node, putobject, parse_string(&xstring_node->unescaped, parser));
|
||||
ADD_SEND_WITH_FLAG(ret, &dummy_line_node, idBackquote, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE));
|
||||
|
||||
PM_POP_IF_POPPED;
|
||||
|
|
|
@ -655,8 +655,8 @@ module Prism
|
|||
assert_separately(%w[], "#{<<-'begin;'}\n#{<<-'end;'}")
|
||||
begin;
|
||||
def compare_eval(source)
|
||||
ruby_eval = RubyVM::InstructionSequence.compile(source).eval
|
||||
prism_eval = RubyVM::InstructionSequence.compile_prism(source).eval
|
||||
ruby_eval = RubyVM::InstructionSequence.compile("module A; " + source + "; end").eval
|
||||
prism_eval = RubyVM::InstructionSequence.compile_prism("module B; " + source + "; end").eval
|
||||
|
||||
assert_equal ruby_eval, prism_eval
|
||||
end
|
||||
|
@ -673,7 +673,7 @@ module Prism
|
|||
$VERBOSE = verbose_bak
|
||||
end
|
||||
end
|
||||
test_prism_eval("a = 1; tap do; { a: }; end")
|
||||
test_prism_eval("a = 1; 1.times do; { a: }; end")
|
||||
test_prism_eval("a = 1; def foo(a); a; end")
|
||||
end;
|
||||
end
|
||||
|
@ -686,6 +686,15 @@ module Prism
|
|||
# TODO
|
||||
end
|
||||
|
||||
############################################################################
|
||||
# Encoding #
|
||||
############################################################################
|
||||
|
||||
def test_encoding
|
||||
test_prism_eval('"però"')
|
||||
test_prism_eval(":però")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def compare_eval(source)
|
||||
|
|
Загрузка…
Ссылка в новой задаче