зеркало из https://github.com/github/ruby.git
[ruby/prism] Support parsing streams
https://github.com/ruby/prism/commit/efdc2b7222
This commit is contained in:
Родитель
76e11595e2
Коммит
ec159fc8ba
|
@ -23,15 +23,21 @@ module Prism
|
|||
# size_t -> :size_t
|
||||
# void -> :void
|
||||
#
|
||||
def self.resolve_type(type)
|
||||
def self.resolve_type(type, callbacks)
|
||||
type = type.strip
|
||||
type.end_with?("*") ? :pointer : type.delete_prefix("const ").to_sym
|
||||
|
||||
if !type.end_with?("*")
|
||||
type.delete_prefix("const ").to_sym
|
||||
else
|
||||
type = type.delete_suffix("*").rstrip
|
||||
callbacks.include?(type.to_sym) ? type.to_sym : :pointer
|
||||
end
|
||||
end
|
||||
|
||||
# Read through the given header file and find the declaration of each of the
|
||||
# given functions. For each one, define a function with the same name and
|
||||
# signature as the C function.
|
||||
def self.load_exported_functions_from(header, *functions)
|
||||
def self.load_exported_functions_from(header, *functions, callbacks)
|
||||
File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
|
||||
# We only want to attempt to load exported functions.
|
||||
next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
|
||||
|
@ -55,24 +61,28 @@ module Prism
|
|||
|
||||
# Resolve the type of the argument by dropping the name of the argument
|
||||
# first if it is present.
|
||||
arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
|
||||
arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) }
|
||||
|
||||
# Attach the function using the FFI library.
|
||||
attach_function name, arg_types, resolve_type(return_type)
|
||||
attach_function name, arg_types, resolve_type(return_type, [])
|
||||
end
|
||||
|
||||
# If we didn't find all of the functions, raise an error.
|
||||
raise "Could not find functions #{functions.inspect}" unless functions.empty?
|
||||
end
|
||||
|
||||
callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
|
||||
|
||||
load_exported_functions_from(
|
||||
"prism.h",
|
||||
"pm_version",
|
||||
"pm_serialize_parse",
|
||||
"pm_serialize_parse_stream",
|
||||
"pm_serialize_parse_comments",
|
||||
"pm_serialize_lex",
|
||||
"pm_serialize_parse_lex",
|
||||
"pm_parse_success_p"
|
||||
"pm_parse_success_p",
|
||||
[:pm_parse_stream_fgets_t]
|
||||
)
|
||||
|
||||
load_exported_functions_from(
|
||||
|
@ -81,7 +91,8 @@ module Prism
|
|||
"pm_buffer_init",
|
||||
"pm_buffer_value",
|
||||
"pm_buffer_length",
|
||||
"pm_buffer_free"
|
||||
"pm_buffer_free",
|
||||
[]
|
||||
)
|
||||
|
||||
load_exported_functions_from(
|
||||
|
@ -90,7 +101,8 @@ module Prism
|
|||
"pm_string_free",
|
||||
"pm_string_source",
|
||||
"pm_string_length",
|
||||
"pm_string_sizeof"
|
||||
"pm_string_sizeof",
|
||||
[]
|
||||
)
|
||||
|
||||
# This object represents a pm_buffer_t. We only use it as an opaque pointer,
|
||||
|
@ -215,13 +227,36 @@ module Prism
|
|||
end
|
||||
|
||||
# Mirror the Prism.parse_file API by using the serialization API. This uses
|
||||
# native strings instead of Ruby strings because it allows us to use mmap when
|
||||
# it is available.
|
||||
# native strings instead of Ruby strings because it allows us to use mmap
|
||||
# when it is available.
|
||||
def parse_file(filepath, **options)
|
||||
options[:filepath] = filepath
|
||||
LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
|
||||
end
|
||||
|
||||
# Mirror the Prism.parse_stream API by using the serialization API.
|
||||
def parse_stream(stream, **options)
|
||||
LibRubyParser::PrismBuffer.with do |buffer|
|
||||
source = +""
|
||||
callback = -> (string, size, _) {
|
||||
raise "Expected size to be >= 0, got: #{size}" if size <= 0
|
||||
|
||||
if !(line = stream.gets(size - 1)).nil?
|
||||
source << line
|
||||
string.write_string("#{line}\x00", line.bytesize + 1)
|
||||
end
|
||||
}
|
||||
|
||||
# In the pm_serialize_parse_stream function it accepts a pointer to the
|
||||
# IO object as a void* and then passes it through to the callback as the
|
||||
# third argument, but it never touches it itself. As such, since we have
|
||||
# access to the IO object already through the closure of the lambda, we
|
||||
# can pass a null pointer here and not worry.
|
||||
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
|
||||
Prism.load(source, buffer.read)
|
||||
end
|
||||
end
|
||||
|
||||
# Mirror the Prism.parse_comments API by using the serialization API.
|
||||
def parse_comments(code, **options)
|
||||
LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }
|
||||
|
|
|
@ -504,6 +504,24 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|||
return warnings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new parse result from the given parser, value, encoding, and source.
|
||||
*/
|
||||
static VALUE
|
||||
parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
|
||||
VALUE result_argv[] = {
|
||||
value,
|
||||
parser_comments(parser, source),
|
||||
parser_magic_comments(parser, source),
|
||||
parser_data_loc(parser, source),
|
||||
parser_errors(parser, encoding, source),
|
||||
parser_warnings(parser, encoding, source),
|
||||
source
|
||||
};
|
||||
|
||||
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Lexing Ruby code */
|
||||
/******************************************************************************/
|
||||
|
@ -610,19 +628,11 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
|||
value = parse_lex_data.tokens;
|
||||
}
|
||||
|
||||
VALUE result_argv[] = {
|
||||
value,
|
||||
parser_comments(&parser, source),
|
||||
parser_magic_comments(&parser, source),
|
||||
parser_data_loc(&parser, source),
|
||||
parser_errors(&parser, parse_lex_data.encoding, source),
|
||||
parser_warnings(&parser, parse_lex_data.encoding, source),
|
||||
source
|
||||
};
|
||||
|
||||
VALUE result = parse_result_create(&parser, value, parse_lex_data.encoding, source);
|
||||
pm_node_destroy(&parser, node);
|
||||
pm_parser_free(&parser);
|
||||
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -682,17 +692,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
|||
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
||||
|
||||
VALUE source = pm_source_new(&parser, encoding);
|
||||
VALUE result_argv[] = {
|
||||
pm_ast_new(&parser, node, encoding, source),
|
||||
parser_comments(&parser, source),
|
||||
parser_magic_comments(&parser, source),
|
||||
parser_data_loc(&parser, source),
|
||||
parser_errors(&parser, encoding, source),
|
||||
parser_warnings(&parser, encoding, source),
|
||||
source
|
||||
};
|
||||
|
||||
VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
||||
VALUE value = pm_ast_new(&parser, node, encoding, source);
|
||||
VALUE result = parse_result_create(&parser, value, encoding, source) ;
|
||||
|
||||
pm_node_destroy(&parser, node);
|
||||
pm_parser_free(&parser);
|
||||
|
@ -751,6 +752,60 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* An implementation of fgets that is suitable for use with Ruby IO objects.
|
||||
*/
|
||||
static char *
|
||||
parse_stream_fgets(char *restrict string, int size, void *restrict stream) {
|
||||
RUBY_ASSERT(size > 0);
|
||||
|
||||
VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
|
||||
if (NIL_P(line)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *cstr = StringValueCStr(line);
|
||||
size_t length = strlen(cstr);
|
||||
|
||||
memcpy(string, cstr, length);
|
||||
string[length] = '\0';
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* Prism::parse_stream(stream, **options) -> ParseResult
|
||||
*
|
||||
* Parse the given object that responds to `gets` and return a ParseResult
|
||||
* instance. The options that are supported are the same as Prism::parse.
|
||||
*/
|
||||
static VALUE
|
||||
parse_stream(int argc, VALUE *argv, VALUE self) {
|
||||
VALUE stream;
|
||||
VALUE keywords;
|
||||
rb_scan_args(argc, argv, "1:", &stream, &keywords);
|
||||
|
||||
pm_options_t options = { 0 };
|
||||
extract_options(&options, Qnil, keywords);
|
||||
|
||||
pm_parser_t parser;
|
||||
pm_buffer_t buffer;
|
||||
|
||||
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
|
||||
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
||||
|
||||
VALUE source = pm_source_new(&parser, encoding);
|
||||
VALUE value = pm_ast_new(&parser, node, encoding, source);
|
||||
VALUE result = parse_result_create(&parser, value, encoding, source);
|
||||
|
||||
pm_node_destroy(&parser, node);
|
||||
pm_buffer_free(&buffer);
|
||||
pm_parser_free(&parser);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* Prism::parse_file(filepath, **options) -> ParseResult
|
||||
|
@ -1271,6 +1326,7 @@ Init_prism(void) {
|
|||
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
|
||||
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
|
||||
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
|
||||
rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
|
||||
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
|
||||
rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
|
||||
rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
|
||||
|
|
115
prism/prism.c
115
prism/prism.c
|
@ -18703,6 +18703,99 @@ pm_parse(pm_parser_t *parser) {
|
|||
return parse_program(parser);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read into the stream until the gets callback returns false. If the last read
|
||||
* line from the stream matches an __END__ marker, then halt and return false,
|
||||
* otherwise return true.
|
||||
*/
|
||||
static bool
|
||||
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
|
||||
#define LINE_SIZE 4096
|
||||
char line[LINE_SIZE];
|
||||
|
||||
while (fgets(line, LINE_SIZE, stream) != NULL) {
|
||||
size_t length = strlen(line);
|
||||
|
||||
if (length == LINE_SIZE && line[length - 1] != '\n') {
|
||||
// If we read a line that is the maximum size and it doesn't end
|
||||
// with a newline, then we'll just append it to the buffer and
|
||||
// continue reading.
|
||||
pm_buffer_append_string(buffer, line, length);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Append the line to the buffer.
|
||||
pm_buffer_append_string(buffer, line, length);
|
||||
|
||||
// Check if the line matches the __END__ marker. If it does, then stop
|
||||
// reading and return false. In most circumstances, this means we should
|
||||
// stop reading from the stream so that the DATA constant can pick it
|
||||
// up.
|
||||
switch (length) {
|
||||
case 7:
|
||||
if (strncmp(line, "__END__", 7) == 0) return false;
|
||||
break;
|
||||
case 8:
|
||||
if (strncmp(line, "__END__\n", 8) == 0) return false;
|
||||
break;
|
||||
case 9:
|
||||
if (strncmp(line, "__END__\r\n", 9) == 0) return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
#undef LINE_SIZE
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if there was an unterminated heredoc at the end of the input, which
|
||||
* would mean the stream isn't finished and we should keep reading.
|
||||
*
|
||||
* For the other lex modes we can check if the lex mode has been closed, but for
|
||||
* heredocs when we hit EOF we close the lex mode and then go back to parse the
|
||||
* rest of the line after the heredoc declaration so that we get more of the
|
||||
* syntax tree.
|
||||
*/
|
||||
static bool
|
||||
pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
|
||||
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
|
||||
|
||||
for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
|
||||
if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a stream of Ruby source and return the tree.
|
||||
*
|
||||
* Prism is designed around having the entire source in memory at once, but you
|
||||
* can stream stdin in to Ruby so we need to support a streaming API.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION pm_node_t *
|
||||
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
|
||||
pm_buffer_init(buffer);
|
||||
|
||||
bool eof = pm_parse_stream_read(buffer, stream, fgets);
|
||||
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
|
||||
pm_node_t *node = pm_parse(parser);
|
||||
|
||||
while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
|
||||
pm_node_destroy(parser, node);
|
||||
eof = pm_parse_stream_read(buffer, stream, fgets);
|
||||
|
||||
pm_parser_free(parser);
|
||||
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
|
||||
node = pm_parse(parser);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pm_serialize_header(pm_buffer_t *buffer) {
|
||||
pm_buffer_append_string(buffer, "PRISM", 5);
|
||||
|
@ -18745,6 +18838,28 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
|
|||
pm_options_free(&options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse and serialize the AST represented by the source that is read out of the
|
||||
* given stream into to the given buffer.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION void
|
||||
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
|
||||
pm_parser_t parser;
|
||||
pm_options_t options = { 0 };
|
||||
pm_options_read(&options, data);
|
||||
|
||||
pm_buffer_t parser_buffer;
|
||||
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
|
||||
pm_serialize_header(buffer);
|
||||
pm_serialize_content(&parser, node, buffer);
|
||||
pm_buffer_append_byte(buffer, '\0');
|
||||
|
||||
pm_node_destroy(&parser, node);
|
||||
pm_buffer_free(&parser_buffer);
|
||||
pm_parser_free(&parser);
|
||||
pm_options_free(&options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse and serialize the comments in the given source to the given buffer.
|
||||
*/
|
||||
|
|
|
@ -79,6 +79,36 @@ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
|
|||
*/
|
||||
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
|
||||
|
||||
/**
|
||||
* This function is used in pm_parse_stream to retrieve a line of input from a
|
||||
* stream. It closely mirrors that of fgets so that fgets can be used as the
|
||||
* default implementation.
|
||||
*/
|
||||
typedef char * (pm_parse_stream_fgets_t)(char *restrict string, int size, void *restrict stream);
|
||||
|
||||
/**
|
||||
* Parse a stream of Ruby source and return the tree.
|
||||
*
|
||||
* @param parser The parser to use.
|
||||
* @param buffer The buffer to use.
|
||||
* @param stream The stream to parse.
|
||||
* @param fgets The function to use to read from the stream.
|
||||
* @param options The optional options to use when parsing.
|
||||
* @return The AST representing the source.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options);
|
||||
|
||||
/**
|
||||
* Parse and serialize the AST represented by the source that is read out of the
|
||||
* given stream into to the given buffer.
|
||||
*
|
||||
* @param buffer The buffer to serialize to.
|
||||
* @param stream The stream to parse.
|
||||
* @param fgets The function to use to read from the stream.
|
||||
* @param data The optional data to pass to the parser.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data);
|
||||
|
||||
/**
|
||||
* Serialize the given list of comments to the given buffer.
|
||||
*
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "test_helper"
|
||||
require "stringio"
|
||||
|
||||
module Prism
|
||||
class ParseStreamTest < TestCase
|
||||
def test_single_line
|
||||
io = StringIO.new("1 + 2")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_kind_of Prism::CallNode, result.value.statements.body.first
|
||||
end
|
||||
|
||||
def test_multi_line
|
||||
io = StringIO.new("1 + 2\n3 + 4")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_kind_of Prism::CallNode, result.value.statements.body.first
|
||||
assert_kind_of Prism::CallNode, result.value.statements.body.last
|
||||
end
|
||||
|
||||
def test_multi_read
|
||||
io = StringIO.new("a" * 4096 * 4)
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_kind_of Prism::CallNode, result.value.statements.body.first
|
||||
end
|
||||
|
||||
def test___END__
|
||||
io = StringIO.new("1 + 2\n3 + 4\n__END__\n5 + 6")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_equal 2, result.value.statements.body.length
|
||||
assert_equal "5 + 6", io.read
|
||||
end
|
||||
|
||||
def test_false___END___in_string
|
||||
io = StringIO.new("1 + 2\n3 + 4\n\"\n__END__\n\"\n5 + 6")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_equal 4, result.value.statements.body.length
|
||||
end
|
||||
|
||||
def test_false___END___in_regexp
|
||||
io = StringIO.new("1 + 2\n3 + 4\n/\n__END__\n/\n5 + 6")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_equal 4, result.value.statements.body.length
|
||||
end
|
||||
|
||||
def test_false___END___in_list
|
||||
io = StringIO.new("1 + 2\n3 + 4\n%w[\n__END__\n]\n5 + 6")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_equal 4, result.value.statements.body.length
|
||||
end
|
||||
|
||||
def test_false___END___in_heredoc
|
||||
io = StringIO.new("1 + 2\n3 + 4\n<<-EOF\n__END__\nEOF\n5 + 6")
|
||||
result = Prism.parse_stream(io)
|
||||
|
||||
assert result.success?
|
||||
assert_equal 4, result.value.statements.body.length
|
||||
end
|
||||
end
|
||||
end
|
Загрузка…
Ссылка в новой задаче