ruby/ruby_parser.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1159 строки
27 KiB
C
Исходник Обычный вид История

/* This is a wrapper for parse.y */
#include "internal/parse.h"
#include "internal/re.h"
#include "internal/ruby_parser.h"
#include "node.h"
#include "rubyparser.h"
#include "internal/error.h"
#ifdef UNIVERSAL_PARSER
#include "internal.h"
#include "internal/array.h"
#include "internal/bignum.h"
#include "internal/compile.h"
#include "internal/complex.h"
#include "internal/encoding.h"
#include "internal/gc.h"
#include "internal/hash.h"
#include "internal/io.h"
#include "internal/rational.h"
#include "internal/re.h"
#include "internal/string.h"
#include "internal/symbol.h"
#include "internal/thread.h"
#include "ruby/ractor.h"
#include "ruby/ruby.h"
#include "ruby/util.h"
#include "internal.h"
#include "vm_core.h"
#include "symbol.h"
static int
is_ascii_string2(VALUE str)
{
return is_ascii_string(str);
}
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
static VALUE
syntax_error_append(VALUE exc, VALUE file, int line, int column,
void *enc, const char *fmt, va_list args)
{
return rb_syntax_error_append(exc, file, line, column, (rb_encoding *)enc, fmt, args);
}
static int
local_defined(ID id, const void *p)
{
return rb_local_defined(id, (const rb_iseq_t *)p);
}
static int
dvar_defined(ID id, const void *p)
{
return rb_dvar_defined(id, (const rb_iseq_t *)p);
}
static int
is_usascii_enc(void *enc)
{
return rb_is_usascii_enc((rb_encoding *)enc);
}
static int
is_local_id2(ID id)
{
return is_local_id(id);
}
static int
is_attrset_id2(ID id)
{
return is_attrset_id(id);
}
static int
is_notop_id2(ID id)
{
return is_notop_id(id);
}
static VALUE
enc_str_new(const char *ptr, long len, void *enc)
{
return rb_enc_str_new(ptr, len, (rb_encoding *)enc);
}
static int
enc_isalnum(OnigCodePoint c, void *enc)
{
return rb_enc_isalnum(c, (rb_encoding *)enc);
}
static int
enc_precise_mbclen(const char *p, const char *e, void *enc)
{
return rb_enc_precise_mbclen(p, e, (rb_encoding *)enc);
}
static int
mbclen_charfound_p(int len)
{
return MBCLEN_CHARFOUND_P(len);
}
static int
mbclen_charfound_len(int len)
{
return MBCLEN_CHARFOUND_LEN(len);
}
static const char *
enc_name(void *enc)
{
return rb_enc_name((rb_encoding *)enc);
}
static char *
enc_prev_char(const char *s, const char *p, const char *e, void *enc)
{
return rb_enc_prev_char(s, p, e, (rb_encoding *)enc);
}
static void *
enc_get(VALUE obj)
{
return (void *)rb_enc_get(obj);
}
static int
enc_asciicompat(void *enc)
{
return rb_enc_asciicompat((rb_encoding *)enc);
}
static void *
utf8_encoding(void)
{
return (void *)rb_utf8_encoding();
}
static VALUE
enc_associate(VALUE obj, void *enc)
{
return rb_enc_associate(obj, (rb_encoding *)enc);
}
static void *
ascii8bit_encoding(void)
{
return (void *)rb_ascii8bit_encoding();
}
static int
enc_codelen(int c, void *enc)
{
return rb_enc_codelen(c, (rb_encoding *)enc);
}
static int
enc_mbcput(unsigned int c, void *buf, void *enc)
{
return rb_enc_mbcput(c, buf, (rb_encoding *)enc);
}
static int
enc_mbclen(const char *p, const char *e, void *enc)
{
return rb_enc_mbclen(p, e, (rb_encoding *)enc);
}
static void *
enc_from_index(int idx)
{
return (void *)rb_enc_from_index(idx);
}
static int
enc_isspace(OnigCodePoint c, void *enc)
{
return rb_enc_isspace(c, (rb_encoding *)enc);
}
static ID
intern3(const char *name, long len, void *enc)
{
return rb_intern3(name, len, (rb_encoding *)enc);
}
static void *
usascii_encoding(void)
{
return (void *)rb_usascii_encoding();
}
static int
enc_symname_type(const char *name, long len, void *enc, unsigned int allowed_attrset)
{
return rb_enc_symname_type(name, len, (rb_encoding *)enc, allowed_attrset);
}
typedef struct {
struct parser_params *parser;
rb_encoding *enc;
NODE *succ_block;
const rb_code_location_t *loc;
} reg_named_capture_assign_t;
static int
reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
int back_num, int *back_refs, OnigRegex regex, void *arg0)
{
reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0;
struct parser_params* p = arg->parser;
rb_encoding *enc = arg->enc;
const rb_code_location_t *loc = arg->loc;
long len = name_end - name;
const char *s = (const char *)name;
return rb_reg_named_capture_assign_iter_impl(p, s, len, (void *)enc, &arg->succ_block, loc);
}
static NODE *
reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc)
{
reg_named_capture_assign_t arg;
arg.parser = p;
arg.enc = rb_enc_get(regexp);
arg.succ_block = 0;
arg.loc = loc;
onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg);
if (!arg.succ_block) return 0;
return RNODE_BLOCK(arg.succ_block)->nd_next;
}
static int
rtest(VALUE obj)
{
return (int)RB_TEST(obj);
}
static int
nil_p(VALUE obj)
{
return (int)NIL_P(obj);
}
static VALUE
syntax_error_new(void)
{
return rb_class_new_instance(0, 0, rb_eSyntaxError);
}
static void *
memmove2(void *dest, const void *src, size_t t, size_t n)
{
return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
}
static void *
nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
{
return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
}
static VALUE
ruby_verbose2(void)
{
return ruby_verbose;
}
static int *
rb_errno_ptr2(void)
{
return rb_errno_ptr();
}
static void *
zalloc(size_t elemsiz)
{
return ruby_xcalloc(1, elemsiz);
}
static void
gc_guard(VALUE obj)
{
RB_GC_GUARD(obj);
}
2024-01-11 15:27:19 +03:00
static VALUE
arg_error(void)
{
return rb_eArgError;
}
static VALUE
static_id2sym(ID id)
{
return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
}
static long
str_coderange_scan_restartable(const char *s, const char *e, void *enc, int *cr)
{
return rb_str_coderange_scan_restartable(s, e, (rb_encoding *)enc, cr);
}
static int
enc_mbminlen(void *enc)
{
return rb_enc_mbminlen((rb_encoding *)enc);
}
static bool
enc_isascii(OnigCodePoint c, void *enc)
{
return rb_enc_isascii(c, (rb_encoding *)enc);
}
static OnigCodePoint
enc_mbc_to_codepoint(const char *p, const char *e, void *enc)
{
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
}
2024-01-11 15:27:19 +03:00
extern VALUE rb_eArgError;
2024-01-14 11:55:11 +03:00
static const rb_parser_config_t rb_global_parser_config = {
2024-01-11 15:27:19 +03:00
.malloc = ruby_xmalloc,
.calloc = ruby_xcalloc,
.realloc = ruby_xrealloc,
.free = ruby_xfree,
.alloc_n = ruby_xmalloc2,
.alloc = ruby_xmalloc,
.realloc_n = ruby_xrealloc2,
.zalloc = zalloc,
.rb_memmove = memmove2,
.nonempty_memcpy = nonempty_memcpy,
.xmalloc_mul_add = rb_xmalloc_mul_add,
.compile_callback = rb_suppress_tracing,
.reg_named_capture_assign = reg_named_capture_assign,
.attr_get = rb_attr_get,
.ary_new = rb_ary_new,
.ary_push = rb_ary_push,
.ary_new_from_args = rb_ary_new_from_args,
.ary_unshift = rb_ary_unshift,
.make_temporary_id = rb_make_temporary_id,
.is_local_id = is_local_id2,
.is_attrset_id = is_attrset_id2,
.is_global_name_punct = is_global_name_punct,
.id_type = id_type,
.id_attrset = rb_id_attrset,
.intern = rb_intern,
.intern2 = rb_intern2,
.intern3 = intern3,
.intern_str = rb_intern_str,
.is_notop_id = is_notop_id2,
.enc_symname_type = enc_symname_type,
.id2name = rb_id2name,
.id2str = rb_id2str,
.id2sym = rb_id2sym,
.sym2id = rb_sym2id,
.str_catf = rb_str_catf,
.str_cat_cstr = rb_str_cat_cstr,
.str_modify = rb_str_modify,
.str_set_len = rb_str_set_len,
.str_cat = rb_str_cat,
.str_resize = rb_str_resize,
.str_new = rb_str_new,
.str_new_cstr = rb_str_new_cstr,
.str_to_interned_str = rb_str_to_interned_str,
2024-01-11 15:27:19 +03:00
.is_ascii_string = is_ascii_string2,
.enc_str_new = enc_str_new,
.str_vcatf = rb_str_vcatf,
.string_value_cstr = rb_string_value_cstr,
.rb_sprintf = rb_sprintf,
.rstring_ptr = RSTRING_PTR,
.rstring_end = RSTRING_END,
.rstring_len = RSTRING_LEN,
.obj_as_string = rb_obj_as_string,
.int2num = rb_int2num_inline,
.stderr_tty_p = rb_stderr_tty_p,
.write_error_str = rb_write_error_str,
.io_write = rb_io_write,
.io_flush = rb_io_flush,
.io_puts = rb_io_puts,
.debug_output_stdout = rb_ractor_stdout,
.debug_output_stderr = rb_ractor_stderr,
.is_usascii_enc = is_usascii_enc,
.enc_isalnum = enc_isalnum,
.enc_precise_mbclen = enc_precise_mbclen,
.mbclen_charfound_p = mbclen_charfound_p,
.mbclen_charfound_len = mbclen_charfound_len,
2024-01-11 15:27:19 +03:00
.enc_name = enc_name,
.enc_prev_char = enc_prev_char,
.enc_get = enc_get,
.enc_asciicompat = enc_asciicompat,
.utf8_encoding = utf8_encoding,
.enc_associate = enc_associate,
.ascii8bit_encoding = ascii8bit_encoding,
.enc_codelen = enc_codelen,
.enc_mbcput = enc_mbcput,
.enc_mbclen = enc_mbclen,
2024-01-11 15:27:19 +03:00
.enc_find_index = rb_enc_find_index,
.enc_from_index = enc_from_index,
.enc_isspace = enc_isspace,
.enc_coderange_7bit = ENC_CODERANGE_7BIT,
.enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
.usascii_encoding = usascii_encoding,
.enc_coderange_broken = ENC_CODERANGE_BROKEN,
.enc_mbminlen = enc_mbminlen,
.enc_isascii = enc_isascii,
.enc_mbc_to_codepoint = enc_mbc_to_codepoint,
2024-01-11 15:27:19 +03:00
.local_defined = local_defined,
.dvar_defined = dvar_defined,
.syntax_error_append = syntax_error_append,
.raise = rb_raise,
.syntax_error_new = syntax_error_new,
.errinfo = rb_errinfo,
.set_errinfo = rb_set_errinfo,
.exc_raise = rb_exc_raise,
.make_exception = rb_make_exception,
.sized_xfree = ruby_sized_xfree,
.sized_realloc_n = ruby_sized_realloc_n,
.gc_guard = gc_guard,
.gc_mark = rb_gc_mark,
.reg_compile = rb_reg_compile,
.reg_check_preprocess = rb_reg_check_preprocess,
.memcicmp = rb_memcicmp,
.compile_warn = rb_compile_warn,
.compile_warning = rb_compile_warning,
.bug = rb_bug,
.fatal = rb_fatal,
.verbose = ruby_verbose2,
.errno_ptr = rb_errno_ptr2,
.make_backtrace = rb_make_backtrace,
.scan_hex = ruby_scan_hex,
.scan_oct = ruby_scan_oct,
.scan_digits = ruby_scan_digits,
.strtod = ruby_strtod,
.rtest = rtest,
.nil_p = nil_p,
.qnil = Qnil,
.qfalse = Qfalse,
.eArgError = arg_error,
.long2int = rb_long2int,
/* For Ripper */
2024-01-11 15:27:19 +03:00
.static_id2sym = static_id2sym,
.str_coderange_scan_restartable = str_coderange_scan_restartable,
};
#endif
2024-01-11 15:27:19 +03:00
enum lex_type {
lex_type_str,
lex_type_io,
lex_type_array,
lex_type_generic,
};
struct ruby_parser {
rb_parser_t *parser_params;
enum lex_type type;
union {
struct lex_pointer_string lex_str;
struct {
VALUE file;
} lex_io;
struct {
VALUE ary;
} lex_array;
} data;
};
static void
parser_mark(void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
rb_ruby_parser_mark(parser->parser_params);
switch (parser->type) {
case lex_type_str:
rb_gc_mark(parser->data.lex_str.str);
break;
case lex_type_io:
rb_gc_mark(parser->data.lex_io.file);
break;
case lex_type_array:
rb_gc_mark(parser->data.lex_array.ary);
break;
case lex_type_generic:
/* noop. Caller of rb_parser_compile_generic should mark the objects. */
break;
}
}
static void
parser_free(void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
rb_ruby_parser_free(parser->parser_params);
xfree(parser);
}
static size_t
parser_memsize(const void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
return rb_ruby_parser_memsize(parser->parser_params);
}
static const rb_data_type_t ruby_parser_data_type = {
"parser",
{
parser_mark,
parser_free,
parser_memsize,
},
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
};
#ifdef UNIVERSAL_PARSER
const rb_parser_config_t *
rb_ruby_parser_config(void)
{
return &rb_global_parser_config;
}
2024-01-11 15:27:19 +03:00
rb_parser_t *
rb_parser_params_new(void)
{
return rb_ruby_parser_new(&rb_global_parser_config);
}
#else
rb_parser_t *
rb_parser_params_new(void)
{
return rb_ruby_parser_new();
}
#endif /* UNIVERSAL_PARSER */
VALUE
rb_parser_new(void)
{
struct ruby_parser *parser;
rb_parser_t *parser_params;
/*
* Create parser_params ahead of vparser because
* rb_ruby_parser_new can run GC so if create vparser
* first, parser_mark tries to mark not initialized parser_params.
*/
2024-01-11 15:27:19 +03:00
parser_params = rb_parser_params_new();
VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
&ruby_parser_data_type, parser);
parser->parser_params = parser_params;
return vparser;
}
void
rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_options(parser->parser_params, print, loop, chomp, split);
}
VALUE
rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_context(parser->parser_params, base, main);
return vparser;
}
void
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
rb_parser_set_script_lines(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
rb_ruby_parser_set_script_lines(parser->parser_params);
}
void
rb_parser_error_tolerant(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_error_tolerant(parser->parser_params);
}
void
rb_parser_keep_tokens(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_keep_tokens(parser->parser_params);
}
VALUE
rb_parser_lex_get_str(struct lex_pointer_string *ptr_str)
{
char *beg, *end, *start;
long len;
VALUE s = ptr_str->str;
beg = RSTRING_PTR(s);
len = RSTRING_LEN(s);
start = beg;
if (ptr_str->ptr) {
if (len == ptr_str->ptr) return Qnil;
beg += ptr_str->ptr;
len -= ptr_str->ptr;
}
end = memchr(beg, '\n', len);
if (end) len = ++end - beg;
ptr_str->ptr += len;
return rb_str_subseq(s, beg - start, len);
}
static VALUE
lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
{
return rb_parser_lex_get_str((struct lex_pointer_string *)input);
}
static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines);
static rb_ast_t*
parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
{
rb_ast_t *ast;
const char *ptr = 0;
long len = 0;
rb_encoding *enc = 0;
if (!NIL_P(fname)) {
StringValueCStr(fname);
ptr = RSTRING_PTR(fname);
len = RSTRING_LEN(fname);
enc = rb_enc_get(fname);
}
ast = rb_parser_compile(p, gets, ptr, len, enc, input, line);
parser_aset_script_lines_for(fname, ast->body.script_lines);
return ast;
}
static rb_ast_t*
parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
{
VALUE str = rb_str_new_frozen(s);
parser->type = lex_type_str;
parser->data.lex_str.str = str;
parser->data.lex_str.ptr = 0;
return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
}
static rb_encoding *
must_be_ascii_compatible(VALUE s)
{
rb_encoding *enc = rb_enc_get(s);
if (!rb_enc_asciicompat(enc)) {
rb_raise(rb_eArgError, "invalid source encoding");
}
return enc;
}
static rb_ast_t*
parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
{
must_be_ascii_compatible(s);
return parser_compile_string0(parser, f, s, line);
}
static rb_ast_t*
parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
{
return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
}
VALUE rb_io_gets_internal(VALUE io);
static VALUE
lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
{
VALUE io = (VALUE)input;
return rb_io_gets_internal(io);
}
static VALUE
lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
{
VALUE array = (VALUE)data;
VALUE str = rb_ary_entry(array, index);
if (!NIL_P(str)) {
StringValue(str);
if (!rb_enc_asciicompat(rb_enc_get(str))) {
rb_raise(rb_eArgError, "invalid source encoding");
}
}
return str;
}
static rb_ast_t*
parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
{
parser->type = lex_type_io;
parser->data.lex_io.file = file;
return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
}
static rb_ast_t*
parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
{
parser->type = lex_type_array;
parser->data.lex_array.ary = array;
return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
}
static rb_ast_t*
parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
{
parser->type = lex_type_generic;
return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
}
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
static void
ast_free(void *ptr)
{
rb_ast_t *ast = (rb_ast_t *)ptr;
rb_ast_free(ast);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
}
static const rb_data_type_t ast_data_type = {
"AST",
{
NULL,
ast_free,
NULL, // No dsize() because this object does not appear in ObjectSpace.
},
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
};
static VALUE
ast_alloc(rb_ast_t *ast)
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
{
VALUE vast = TypedData_Wrap_Struct(0, &ast_data_type, ast);
#ifdef UNIVERSAL_PARSER
ast = (rb_ast_t *)DATA_PTR(vast);
ast->config = &rb_global_parser_config;
#endif
return vast;
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
}
VALUE
rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
VALUE vast = ast_alloc(parser_compile_file_path(parser, fname, file, start));
RB_GC_GUARD(vparser);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
return vast;
}
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
VALUE
rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
VALUE vast = ast_alloc(parser_compile_array(parser, fname, array, start));
RB_GC_GUARD(vparser);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
return vast;
}
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
VALUE
rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
VALUE vast = ast_alloc(parser_compile_generic(parser, lex_gets, fname, input, start));
RB_GC_GUARD(vparser);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
return vast;
}
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
VALUE
rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
VALUE vast = ast_alloc(parser_compile_string(parser, f, s, line));
RB_GC_GUARD(vparser);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
return vast;
}
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
VALUE
rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
VALUE vast = ast_alloc(parser_compile_string_path(parser, f, s, line));
RB_GC_GUARD(vparser);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
return vast;
}
VALUE
rb_parser_encoding(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
return rb_enc_from_encoding(rb_ruby_parser_encoding(parser->parser_params));
}
VALUE
rb_parser_end_seen_p(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
return RBOOL(rb_ruby_parser_end_seen_p(parser->parser_params));
}
VALUE
rb_parser_set_yydebug(VALUE vparser, VALUE flag)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
return flag;
}
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
void
rb_set_script_lines_for(VALUE vparser, VALUE path)
{
struct ruby_parser *parser;
VALUE hash;
ID script_lines;
CONST_ID(script_lines, "SCRIPT_LINES__");
if (!rb_const_defined_at(rb_cObject, script_lines)) return;
hash = rb_const_get_at(rb_cObject, script_lines);
if (RB_TYPE_P(hash, T_HASH)) {
rb_hash_aset(hash, path, Qtrue);
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_script_lines(parser->parser_params);
}
}
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
VALUE
rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
{
int i;
if (!lines) return Qnil;
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
}
VALUE script_lines = rb_ary_new_capa(lines->len);
for (i = 0; i < lines->len; i++) {
rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
}
return script_lines;
}
VALUE
rb_str_new_parser_string(rb_parser_string_t *str)
{
VALUE string = rb_enc_interned_str(str->ptr, str->len, str->enc);
rb_enc_str_coderange(string);
return string;
}
VALUE
rb_str_new_mutable_parser_string(rb_parser_string_t *str)
{
return rb_enc_str_new(str->ptr, str->len, str->enc);
}
static VALUE
negative_numeric(VALUE val)
{
if (FIXNUM_P(val)) {
return LONG2FIX(-FIX2LONG(val));
}
if (SPECIAL_CONST_P(val)) {
#if USE_FLONUM
if (FLONUM_P(val)) {
return DBL2NUM(-RFLOAT_VALUE(val));
}
#endif
goto unknown;
}
switch (BUILTIN_TYPE(val)) {
case T_BIGNUM:
BIGNUM_NEGATE(val);
val = rb_big_norm(val);
break;
case T_RATIONAL:
RATIONAL_SET_NUM(val, negative_numeric(RRATIONAL(val)->num));
break;
case T_COMPLEX:
RCOMPLEX_SET_REAL(val, negative_numeric(RCOMPLEX(val)->real));
RCOMPLEX_SET_IMAG(val, negative_numeric(RCOMPLEX(val)->imag));
break;
case T_FLOAT:
val = DBL2NUM(-RFLOAT_VALUE(val));
break;
unknown:
default:
rb_bug("unknown literal type (%s) passed to negative_numeric",
rb_builtin_class_name(val));
break;
}
return val;
}
static VALUE
integer_value(const char *val, int base)
{
return rb_cstr_to_inum(val, base, FALSE);
}
static VALUE
rational_value(const char *node_val, int base, int seen_point)
{
VALUE lit;
char* val = strdup(node_val);
if (seen_point > 0) {
int len = (int)(strlen(val));
char *point = &val[seen_point];
size_t fraclen = len-seen_point-1;
memmove(point, point+1, fraclen+1);
lit = rb_rational_new(integer_value(val, base), rb_int_positive_pow(10, fraclen));
}
else {
lit = rb_rational_raw1(integer_value(val, base));
}
free(val);
return lit;
}
VALUE
rb_node_integer_literal_val(const NODE *n)
{
const rb_node_integer_t *node = RNODE_INTEGER(n);
VALUE val = integer_value(node->val, node->base);
if (node->minus) {
val = negative_numeric(val);
}
return val;
}
VALUE
rb_node_float_literal_val(const NODE *n)
{
const rb_node_float_t *node = RNODE_FLOAT(n);
double d = strtod(node->val, 0);
if (node->minus) {
d = -d;
}
VALUE val = DBL2NUM(d);
return val;
}
VALUE
rb_node_rational_literal_val(const NODE *n)
{
VALUE lit;
const rb_node_rational_t *node = RNODE_RATIONAL(n);
lit = rational_value(node->val, node->base, node->seen_point);
if (node->minus) {
lit = negative_numeric(lit);
}
return lit;
}
VALUE
rb_node_imaginary_literal_val(const NODE *n)
{
VALUE lit;
const rb_node_imaginary_t *node = RNODE_IMAGINARY(n);
enum rb_numeric_type type = node->type;
switch (type) {
case integer_literal:
lit = integer_value(node->val, node->base);
break;
case float_literal:{
double d = strtod(node->val, 0);
lit = DBL2NUM(d);
break;
}
case rational_literal:
lit = rational_value(node->val, node->base, node->seen_point);
break;
default:
rb_bug("unreachable");
}
lit = rb_complex_raw(INT2FIX(0), lit);
if (node->minus) {
lit = negative_numeric(lit);
}
return lit;
}
VALUE
rb_node_str_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_STR(node)->string;
return rb_str_new_parser_string(str);
}
VALUE
rb_node_sym_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_SYM(node)->string;
return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
}
VALUE
rb_node_dstr_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DSTR(node)->string;
return str ? rb_str_new_parser_string(str) : Qnil;
}
VALUE
rb_node_dregx_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DREGX(node)->string;
return rb_str_new_parser_string(str);
}
VALUE
rb_node_regx_string_val(const NODE *node)
{
rb_node_regx_t *node_reg = RNODE_REGX(node);
rb_parser_string_t *string = node_reg->string;
VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
return rb_reg_compile(str, node_reg->options, NULL, 0);
}
VALUE
rb_node_line_lineno_val(const NODE *node)
{
return INT2FIX(node->nd_loc.beg_pos.lineno);
}
VALUE
rb_node_file_path_val(const NODE *node)
{
return rb_str_new_parser_string(RNODE_FILE(node)->path);
}
VALUE
rb_node_encoding_val(const NODE *node)
{
return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
}
Separate SCRIPT_LINES__ from ast.c This patch suggests relocating the code dealing with `SCRIPT_LINES__` from ast.c to ruby_parser.c. ## Background - I guess `AbstractSyntaxTree.of` method used to use `SCRIPT_LINES__` internally for some reason before - However, now it appears `SCRIPT_LINES__` is no longer used meaningfully by the method - As evidence of this, (and as my patch shows,) removing the function call of `rb_script_lines_for()` from `ast_s_of()` does not affect the result of `test/ruby/test_ast.rb` Given the above, I think two possibilities can be considered: - (A) `AbstractSyntaxTree.of` has not needed `SCRIPT_LINES__` already (I pick this) - (B) We lack a test case of `AbstractSyntaxTree.of` that needs to use `SCRIPT_LINES__` ## Besides, The current implementation causes strange behavior: ```console ruby -e"SCRIPT_LINES__ = {__FILE__ => []}; puts RubyVM::AbstractSyntaxTree.of(->{ 1 + 2 }, keep_script_lines: true).script_lines" => `-e:1:in '<main>': undefined method 'script_lines' for nil (NoMethodError)` ``` I think this is a bug because `AbstractSyntaxTree.of` is not supposed to return `nil` even in this case. This happens due to the ast.c's dependence on `SCRIPT_LINES__`. And at the end of the `ast_s_of()`, `node_find()` can not find the target child node obviously because it doesn't make sense to look for a corresponding node made from the parameter of `AbstractSyntaxTree.of` in the AST tree made from the value of `{__FILE__ => []}` ## Solution Since I think it's good enough `SCRIPT_LINES__` to be only referred by ruby.c, I chose the possibility "(A)" and wrote this patch which moves `rb_script_lines_for()` from ast.c to ruby_parser.c. So as the result: - `ast_s_of()` function no longer look up `SCRIPT_LINES__` - Even so, this patched code passes the existing tests - The strange behavior above no longer happens (I also added a test for it) Please correct me if I miss something🙏
2024-03-29 10:18:14 +03:00
static void
parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
{
VALUE hash, script_lines;
ID script_lines_id;
if (NIL_P(path) || !lines) return;
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
2024-03-28 04:26:42 +03:00
CONST_ID(script_lines_id, "SCRIPT_LINES__");
if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
hash = rb_const_get_at(rb_cObject, script_lines_id);
if (!RB_TYPE_P(hash, T_HASH)) return;
if (rb_hash_lookup(hash, path) == Qnil) return;
script_lines = rb_parser_build_script_lines_from(lines);
rb_hash_aset(hash, path, script_lines);
Separate SCRIPT_LINES__ from ast.c This patch suggests relocating the code dealing with `SCRIPT_LINES__` from ast.c to ruby_parser.c. ## Background - I guess `AbstractSyntaxTree.of` method used to use `SCRIPT_LINES__` internally for some reason before - However, now it appears `SCRIPT_LINES__` is no longer used meaningfully by the method - As evidence of this, (and as my patch shows,) removing the function call of `rb_script_lines_for()` from `ast_s_of()` does not affect the result of `test/ruby/test_ast.rb` Given the above, I think two possibilities can be considered: - (A) `AbstractSyntaxTree.of` has not needed `SCRIPT_LINES__` already (I pick this) - (B) We lack a test case of `AbstractSyntaxTree.of` that needs to use `SCRIPT_LINES__` ## Besides, The current implementation causes strange behavior: ```console ruby -e"SCRIPT_LINES__ = {__FILE__ => []}; puts RubyVM::AbstractSyntaxTree.of(->{ 1 + 2 }, keep_script_lines: true).script_lines" => `-e:1:in '<main>': undefined method 'script_lines' for nil (NoMethodError)` ``` I think this is a bug because `AbstractSyntaxTree.of` is not supposed to return `nil` even in this case. This happens due to the ast.c's dependence on `SCRIPT_LINES__`. And at the end of the `ast_s_of()`, `node_find()` can not find the target child node obviously because it doesn't make sense to look for a corresponding node made from the parameter of `AbstractSyntaxTree.of` in the AST tree made from the value of `{__FILE__ => []}` ## Solution Since I think it's good enough `SCRIPT_LINES__` to be only referred by ruby.c, I chose the possibility "(A)" and wrote this patch which moves `rb_script_lines_for()` from ast.c to ruby_parser.c. So as the result: - `ast_s_of()` function no longer look up `SCRIPT_LINES__` - Even so, this patched code passes the existing tests - The strange behavior above no longer happens (I also added a test for it) Please correct me if I miss something🙏
2024-03-29 10:18:14 +03:00
}
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
VALUE
rb_ruby_ast_new(const NODE *const root)
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
{
rb_ast_t *ast = ruby_xcalloc(1, sizeof(rb_ast_t));
VALUE vast = ast_alloc(ast);
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
ast->body = (rb_ast_body_t){
.root = root,
.frozen_string_literal = -1,
.coverage_enabled = -1,
.script_lines = NULL,
.line_count = 0,
[Universal parser] Decouple IMEMO from rb_ast_t This patch removes the `VALUE flags` member from the `rb_ast_t` structure making `rb_ast_t` no longer an IMEMO object. ## Background We are trying to make the Ruby parser generated from parse.y a universal parser that can be used by other implementations such as mruby. To achieve this, it is necessary to exclude VALUE and IMEMO from parse.y, AST, and NODE. ## Summary (file by file) - `rubyparser.h` - Remove the `VALUE flags` member from `rb_ast_t` - `ruby_parser.c` and `internal/ruby_parser.h` - Use TypedData_Make_Struct VALUE which wraps `rb_ast_t` `in ast_alloc()` so that GC can manage it - You can retrieve `rb_ast_t` from the VALUE by `rb_ruby_ast_data_get()` - Change the return type of `rb_parser_compile_XXXX()` functions from `rb_ast_t *` to `VALUE` - rb_ruby_ast_new() which internally `calls ast_alloc()` is to create VALUE vast outside ruby_parser.c - `iseq.c` and `vm_core.h` - Amend the first parameter of `rb_iseq_new_XXXX()` functions from `rb_ast_body_t *` to `VALUE` - This keeps the VALUE of AST on the machine stack to prevent being removed by GC - `ast.c` - Almost all change is replacement `rb_ast_t *ast` with `VALUE vast` (sorry for the big diff) - Fix `node_memsize()` - Now it includes `rb_ast_local_table_link`, `tokens` and script_lines - `compile.c`, `load.c`, `node.c`, `parse.y`, `proc.c`, `ruby.c`, `template/prelude.c.tmpl`, `vm.c` and `vm_eval.c` - Follow-up due to the above changes - `imemo.{c|h}` - If an object with `imemo_ast` appears, considers it a bug Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2024-04-16 12:42:42 +03:00
};
return vast;
}
rb_ast_t *
rb_ruby_ast_data_get(VALUE vast)
{
rb_ast_t *ast;
if (NIL_P(vast)) return NULL;
TypedData_Get_Struct(vast, rb_ast_t, &ast_data_type, ast);
return ast;
}