diff --git a/ast.c b/ast.c index b7416ad203..3fec251bcc 100644 --- a/ast.c +++ b/ast.c @@ -64,8 +64,8 @@ ast_new_internal(rb_ast_t *ast, const NODE *node) return obj; } -static VALUE rb_ast_parse_str(VALUE str); -static VALUE rb_ast_parse_file(VALUE path); +static VALUE rb_ast_parse_str(VALUE str, VALUE save_script_lines); +static VALUE rb_ast_parse_file(VALUE path, VALUE save_script_lines); static VALUE ast_parse_new(void) @@ -85,29 +85,31 @@ ast_parse_done(rb_ast_t *ast) } static VALUE -ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str) +ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE save_script_lines) { - return rb_ast_parse_str(str); + return rb_ast_parse_str(str, save_script_lines); } static VALUE -rb_ast_parse_str(VALUE str) +rb_ast_parse_str(VALUE str, VALUE save_script_lines) { rb_ast_t *ast = 0; StringValue(str); - ast = rb_parser_compile_string_path(ast_parse_new(), Qnil, str, 1); + VALUE vparser = ast_parse_new(); + if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser); + ast = rb_parser_compile_string_path(vparser, Qnil, str, 1); return ast_parse_done(ast); } static VALUE -ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path) +ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE save_script_lines) { - return rb_ast_parse_file(path); + return rb_ast_parse_file(path, save_script_lines); } static VALUE -rb_ast_parse_file(VALUE path) +rb_ast_parse_file(VALUE path, VALUE save_script_lines) { VALUE f; rb_ast_t *ast = 0; @@ -116,7 +118,9 @@ rb_ast_parse_file(VALUE path) FilePathValue(path); f = rb_file_open_str(path, "r"); rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-")); - ast = rb_parser_compile_file_path(ast_parse_new(), Qnil, f, 1); + VALUE vparser = ast_parse_new(); + if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser); + ast = rb_parser_compile_file_path(vparser, Qnil, f, 1); rb_io_close(f); return ast_parse_done(ast); } @@ -135,12 +139,14 @@ lex_array(VALUE array, int index) } static VALUE -rb_ast_parse_array(VALUE array) +rb_ast_parse_array(VALUE array, VALUE save_script_lines) { rb_ast_t *ast = 0; array = rb_check_array_type(array); - ast = rb_parser_compile_generic(ast_parse_new(), lex_array, Qnil, array, 1); + VALUE vparser = ast_parse_new(); + if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser); + ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1); return ast_parse_done(ast); } @@ -187,7 +193,7 @@ script_lines(VALUE path) } static VALUE -ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body) +ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE save_script_lines) { VALUE path, node, lines; int node_id; @@ -209,13 +215,13 @@ ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body) path = rb_iseq_path(iseq); node_id = iseq->body->location.node_id; if (!NIL_P(lines = script_lines(path))) { - node = rb_ast_parse_array(lines); + node = rb_ast_parse_array(lines, save_script_lines); } else if (RSTRING_LEN(path) == 2 && memcmp(RSTRING_PTR(path), "-e", 2) == 0) { - node = rb_ast_parse_str(rb_e_script); + node = rb_ast_parse_str(rb_e_script, save_script_lines); } else { - node = rb_ast_parse_file(path); + node = rb_ast_parse_file(path, save_script_lines); } return node_find(node, node_id); @@ -698,6 +704,16 @@ ast_node_inspect(rb_execution_context_t *ec, VALUE self) return str; } +static VALUE +ast_node_script_lines(rb_execution_context_t *ec, VALUE self) +{ + struct ASTNodeData *data; + TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data); + VALUE ret = data->ast->body.script_lines; + if (!ret) ret = Qnil; + return ret; +} + #include "ast.rbinc" void diff --git a/ast.rb b/ast.rb index 9d4b05bdf1..ce99f53c45 100644 --- a/ast.rb +++ b/ast.rb @@ -29,8 +29,8 @@ module RubyVM::AbstractSyntaxTree # # RubyVM::AbstractSyntaxTree.parse("x = 1 + 2") # # => # - def self.parse string - Primitive.ast_s_parse string + def self.parse string, save_script_lines: false + Primitive.ast_s_parse string, save_script_lines end # call-seq: @@ -44,8 +44,8 @@ module RubyVM::AbstractSyntaxTree # # RubyVM::AbstractSyntaxTree.parse_file("my-app/app.rb") # # => # - def self.parse_file pathname - Primitive.ast_s_parse_file pathname + def self.parse_file pathname, save_script_lines: false + Primitive.ast_s_parse_file pathname, save_script_lines end # call-seq: @@ -63,8 +63,8 @@ module RubyVM::AbstractSyntaxTree # # RubyVM::AbstractSyntaxTree.of(method(:hello)) # # => # - def self.of body - Primitive.ast_s_of body + def self.of body, save_script_lines: false + Primitive.ast_s_of body, save_script_lines end # RubyVM::AbstractSyntaxTree::Node instances are created by parse methods in @@ -139,5 +139,41 @@ module RubyVM::AbstractSyntaxTree def inspect Primitive.ast_node_inspect end + + # call-seq: + # node.script_lines -> array + # + # Returns the original source code as an array of lines. + # + # Note that this is an API for ruby internal use, debugging, + # and research. Do not use this for any other purpose. + # The compatibility is not guaranteed. + def script_lines + Primitive.ast_node_script_lines + end + + # call-seq: + # node.source -> string + # + # Returns the code fragment that corresponds to this AST. + # + # Note that this is an API for ruby internal use, debugging, + # and research. Do not use this for any other purpose. + # The compatibility is not guaranteed. + # + # Also note that this API may return an incomplete code fragment + # that does not parse; for example, a here document following + # an expression may be dropped. + def source + lines = script_lines + if lines + lines = lines[first_lineno - 1 .. last_lineno - 1] + lines[-1] = lines[-1][0...last_column] + lines[0] = lines[0][first_column..-1] + lines.join + else + nil + end + end end end diff --git a/compile.c b/compile.c index c58a6c204a..30c75119b2 100644 --- a/compile.c +++ b/compile.c @@ -1329,6 +1329,7 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node, ast.root = node; ast.compile_option = 0; ast.line_count = -1; + ast.script_lines = Qfalse; debugs("[new_child_iseq]> ---------------------------------------\n"); int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth; diff --git a/internal/parse.h b/internal/parse.h index a37a39f84d..588b2b34da 100644 --- a/internal/parse.h +++ b/internal/parse.h @@ -15,6 +15,7 @@ struct rb_iseq_struct; /* in vm_core.h */ /* parse.y */ VALUE rb_parser_set_yydebug(VALUE, VALUE); void *rb_parser_load_file(VALUE parser, VALUE name); +void rb_parser_save_script_lines(VALUE vparser); RUBY_SYMBOL_EXPORT_BEGIN VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int); diff --git a/node.c b/node.c index bef9d7bcbd..f3dbf6e959 100644 --- a/node.c +++ b/node.c @@ -1407,6 +1407,7 @@ rb_ast_mark(rb_ast_t *ast) iterate_node_values(&nb->markable, mark_ast_value, NULL); } + if (ast->body.script_lines) rb_gc_mark(ast->body.script_lines); } void diff --git a/node.h b/node.h index 192e121fd7..592b285b83 100644 --- a/node.h +++ b/node.h @@ -399,6 +399,7 @@ typedef struct rb_ast_body_struct { const NODE *root; VALUE compile_option; int line_count; + VALUE script_lines; } rb_ast_body_t; typedef struct rb_ast_struct { VALUE flags; diff --git a/parse.y b/parse.y index 6b42b6b31b..47b63e810d 100644 --- a/parse.y +++ b/parse.y @@ -337,6 +337,7 @@ struct parser_params { unsigned int do_loop: 1; unsigned int do_chomp: 1; unsigned int do_split: 1; + unsigned int save_script_lines: 1; NODE *eval_tree_begin; NODE *eval_tree; @@ -6241,6 +6242,13 @@ yycompile0(VALUE arg) cov = Qtrue; } } + if (p->save_script_lines) { + if (!p->debug_lines) { + p->debug_lines = rb_ary_new(); + } + + RB_OBJ_WRITE(p->ast, &p->ast->body.script_lines, p->debug_lines); + } parser_prepare(p); #define RUBY_DTRACE_PARSE_HOOK(name) \ @@ -13186,6 +13194,15 @@ rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main p->parent_iseq = base; return vparser; } + +void +rb_parser_save_script_lines(VALUE vparser) +{ + struct parser_params *p; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + p->save_script_lines = 1; +} #endif #ifdef RIPPER diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index b039911f3a..5a229eabd4 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -372,4 +372,54 @@ class TestAst < Test::Unit::TestCase _, args = *node.children.last.children[1].children assert_equal(:a, args.children[rest]) end + + def test_save_script_lines_for_parse + node = RubyVM::AbstractSyntaxTree.parse(<<~END, save_script_lines: true) +1.times do + 2.times do + end +end +__END__ +dummy + END + + expected = [ + "1.times do\n", + " 2.times do\n", + " end\n", + "end\n", + "__END__\n", + ] + assert_equal(expected, node.script_lines) + + expected = + "1.times do\n" + + " 2.times do\n" + + " end\n" + + "end" + assert_equal(expected, node.source) + + expected = + "do\n" + + " 2.times do\n" + + " end\n" + + "end" + assert_equal(expected, node.children.last.children.last.source) + + expected = + "2.times do\n" + + " end" + assert_equal(expected, node.children.last.children.last.children.last.source) + end + + def test_save_script_lines_for_of + proc = Proc.new { 1 + 2 } + method = self.method(__method__) + + node_proc = RubyVM::AbstractSyntaxTree.of(proc, save_script_lines: true) + node_method = RubyVM::AbstractSyntaxTree.of(method, save_script_lines: true) + + assert_equal("{ 1 + 2 }", node_proc.source) + assert_equal("def test_save_script_lines_for_of\n", node_method.source.lines.first) + end end