[ruby/prism] Move DATA parsing into its own parse result field

https://github.com/ruby/prism/commit/42b60b6e95
This commit is contained in:
Kevin Newton 2023-11-27 14:17:02 -05:00 коммит произвёл git
Родитель 43dc8e9012
Коммит c798943a4a
9 изменённых файлов: 77 добавлений и 75 удалений

Просмотреть файл

@ -254,10 +254,10 @@ module Prism
loader = Serialize::Loader.new(source, buffer.read)
tokens = loader.load_tokens
node, comments, magic_comments, errors, warnings = loader.load_nodes
node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source)
ParseResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
end
end

Просмотреть файл

@ -831,7 +831,7 @@ module Prism
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)
ParseResult.new(tokens, result.comments, result.magic_comments, result.errors, result.warnings, [])
ParseResult.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, [])
end
end

Просмотреть файл

@ -238,11 +238,6 @@ module Prism
def deconstruct_keys(keys)
{ location: location }
end
# This can only be true for inline comments.
def trailing?
false
end
end
# InlineComment objects are the most common. They correspond to comments in
@ -263,21 +258,17 @@ module Prism
# EmbDocComment objects correspond to comments that are surrounded by =begin
# and =end.
class EmbDocComment < Comment
# This can only be true for inline comments.
def trailing?
false
end
# Returns a string representation of this comment.
def inspect
"#<Prism::EmbDocComment @location=#{location.inspect}>"
end
end
# DATAComment objects correspond to comments that are after the __END__
# keyword in a source file.
class DATAComment < Comment
# Returns a string representation of this comment.
def inspect
"#<Prism::DATAComment @location=#{location.inspect}>"
end
end
# This represents a magic comment that was encountered during parsing.
class MagicComment
# A Location object representing the location of the key in the source.
@ -378,6 +369,11 @@ module Prism
# The list of magic comments that were encountered during parsing.
attr_reader :magic_comments
# An optional location that represents the location of the content after the
# __END__ marker. This content is loaded into the DATA constant when the
# file being parsed is the main file being executed.
attr_reader :data_loc
# The list of errors that were generated during parsing.
attr_reader :errors
@ -388,10 +384,11 @@ module Prism
attr_reader :source
# Create a new parse result object with the given values.
def initialize(value, comments, magic_comments, errors, warnings, source)
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
@comments = comments
@magic_comments = magic_comments
@data_loc = data_loc
@errors = errors
@warnings = warnings
@source = source
@ -399,7 +396,7 @@ module Prism
# Implement the hash pattern matching interface for ParseResult.
def deconstruct_keys(keys)
{ value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
{ value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
end
# Returns true if there were no errors during parsing and false if there

Просмотреть файл

@ -12,7 +12,6 @@ VALUE rb_cPrismLocation;
VALUE rb_cPrismComment;
VALUE rb_cPrismInlineComment;
VALUE rb_cPrismEmbDocComment;
VALUE rb_cPrismDATAComment;
VALUE rb_cPrismMagicComment;
VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
@ -320,22 +319,7 @@ parser_comments(pm_parser_t *parser, VALUE source) {
LONG2FIX(comment->end - comment->start)
};
VALUE type;
switch (comment->type) {
case PM_COMMENT_INLINE:
type = rb_cPrismInlineComment;
break;
case PM_COMMENT_EMBDOC:
type = rb_cPrismEmbDocComment;
break;
case PM_COMMENT___END__:
type = rb_cPrismDATAComment;
break;
default:
type = rb_cPrismInlineComment;
break;
}
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
}
@ -374,6 +358,25 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
return magic_comments;
}
/**
* Extract out the data location from the parser into a Location instance if one
* exists.
*/
static VALUE
parser_data_loc(const pm_parser_t *parser, VALUE source) {
if (parser->data_loc.end == NULL) {
return Qnil;
} else {
VALUE argv[] = {
source,
LONG2FIX(parser->data_loc.start - parser->start),
LONG2FIX(parser->data_loc.end - parser->data_loc.start)
};
return rb_class_new_instance(3, argv, rb_cPrismLocation);
}
}
/**
* Extract the errors out of the parser into an array.
*/
@ -531,6 +534,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
value,
parser_comments(&parser, source),
parser_magic_comments(&parser, source),
parser_data_loc(&parser, source),
parser_errors(&parser, parse_lex_data.encoding, source),
parser_warnings(&parser, parse_lex_data.encoding, source),
source
@ -538,7 +542,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
}
/**
@ -601,12 +605,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
pm_ast_new(&parser, node, encoding),
parser_comments(&parser, source),
parser_magic_comments(&parser, source),
parser_data_loc(&parser, source),
parser_errors(&parser, encoding, source),
parser_warnings(&parser, encoding, source),
source
};
VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@ -938,7 +943,6 @@ Init_prism(void) {
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);

Просмотреть файл

@ -361,8 +361,7 @@ typedef struct pm_context_node {
/** This is the type of a comment that we've found while parsing. */
typedef enum {
PM_COMMENT_INLINE,
PM_COMMENT_EMBDOC,
PM_COMMENT___END__
PM_COMMENT_EMBDOC
} pm_comment_type_t;
/**
@ -571,6 +570,9 @@ struct pm_parser {
/** The list of magic comments that have been found while parsing. */
pm_list_t magic_comment_list;
/** The optional location of the __END__ keyword and its contents. */
pm_location_t data_loc;
/** The list of warnings that have been found while parsing. */
pm_list_t warning_list;

Просмотреть файл

@ -9248,8 +9248,8 @@ parser_lex(pm_parser_t *parser) {
parser->current.type = PM_TOKEN___END__;
parser_lex_callback(parser);
pm_comment_t *comment = parser_comment(parser, PM_COMMENT___END__);
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
parser->data_loc.start = parser->current.start;
parser->data_loc.end = parser->current.end;
LEX(PM_TOKEN_EOF);
}

Просмотреть файл

@ -95,9 +95,10 @@ module Prism
def load_metadata
comments = load_comments
magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
data_loc = load_optional_location
errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
[comments, magic_comments, errors, warnings]
[comments, magic_comments, data_loc, errors, warnings]
end
def load_tokens
@ -117,11 +118,11 @@ module Prism
tokens = load_tokens
encoding = load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata
comments, magic_comments, data_loc, errors, warnings = load_metadata
tokens.each { |token,| token.value.force_encoding(encoding) }
raise "Expected to consume all bytes while deserializing" unless @io.eof?
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
Prism::ParseResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
end
def load_nodes
@ -129,17 +130,17 @@ module Prism
load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata
comments, magic_comments, data_loc, errors, warnings = load_metadata
@constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil)
[load_node, comments, magic_comments, errors, warnings]
[load_node, comments, magic_comments, data_loc, errors, warnings]
end
def load_result
node, comments, magic_comments, errors, warnings = load_nodes
Prism::ParseResult.new(node, comments, magic_comments, errors, warnings, @source)
node, comments, magic_comments, data_loc, errors, warnings = load_nodes
Prism::ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
end
private

Просмотреть файл

@ -15,7 +15,7 @@ pm_sizet_to_u32(size_t value) {
}
static void
pm_serialize_location(pm_parser_t *parser, pm_location_t *location, pm_buffer_t *buffer) {
pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) {
assert(location->start);
assert(location->end);
assert(location->start <= location->end);
@ -170,6 +170,16 @@ pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_
}
}
static void
pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
if (parser->data_loc.end == NULL) {
pm_buffer_append_byte(buffer, 0);
} else {
pm_buffer_append_byte(buffer, 1);
pm_serialize_location(parser, &parser->data_loc, buffer);
}
}
static void
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
// serialize message
@ -214,6 +224,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
<%- end -%>
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
pm_serialize_data_loc(parser, buffer);
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
@ -310,6 +321,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
pm_buffer_append_varint(buffer, parser.start_line);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
pm_serialize_data_loc(&parser, buffer);
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);

Просмотреть файл

@ -39,37 +39,23 @@ module Prism
)
end
def test_comment___END__
source = <<~RUBY
def test___END__
result = Prism.parse(<<~RUBY)
__END__
comment
RUBY
assert_comment(
source,
DATAComment,
start_offset: 0,
end_offset: 16,
start_line: 1,
end_line: 3,
start_column: 0,
end_column: 0
)
data_loc = result.data_loc
assert_equal 0, data_loc.start_offset
assert_equal 16, data_loc.end_offset
end
def test_comment___END__crlf
source = "__END__\r\ncomment\r\n"
def test___END__crlf
result = Prism.parse("__END__\r\ncomment\r\n")
assert_comment(
source,
DATAComment,
start_offset: 0,
end_offset: 18,
start_line: 1,
end_line: 3,
start_column: 0,
end_column: 0
)
data_loc = result.data_loc
assert_equal 0, data_loc.start_offset
assert_equal 18, data_loc.end_offset
end
def test_comment_embedded_document