From a6dac9bb4f05530aec8c3f117d2cbe38bade99f2 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 11 Mar 2024 11:05:31 -0400
Subject: [PATCH] [PRISM] Parse stdin on CLI with prism

---
 prism_compile.c | 89 ++++++++++++++++++++++++++++++++++++++-----------
 prism_compile.h |  1 +
 ruby.c          |  6 +++-
 3 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/prism_compile.c b/prism_compile.c
index 8b65ae7a7a..fcc9eddfbb 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -8100,7 +8100,7 @@ pm_parse_result_free(pm_parse_result_t *result)
  * as well.
  */
 static bool
-pm_parse_input_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location)
+pm_parse_process_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location)
 {
     const size_t start_line = pm_newline_list_line_column(&parser->newline_list, location->start, 1).line;
     const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->end, 1).line;
@@ -8122,7 +8122,7 @@ pm_parse_input_error_utf8_p(const pm_parser_t *parser, const pm_location_t *loca
  * information as possible about the errors that were encountered.
  */
 static VALUE
-pm_parse_input_error(const pm_parse_result_t *result)
+pm_parse_process_error(const pm_parse_result_t *result)
 {
     const pm_diagnostic_t *head = (const pm_diagnostic_t *) result->parser.error_list.head;
     bool valid_utf8 = true;
@@ -8140,7 +8140,7 @@ pm_parse_input_error(const pm_parse_result_t *result)
         // contain invalid byte sequences. So if any source examples include
         // invalid UTF-8 byte sequences, we will skip showing source examples
         // entirely.
-        if (valid_utf8 && !pm_parse_input_error_utf8_p(&result->parser, &error->location)) {
+        if (valid_utf8 && !pm_parse_process_error_utf8_p(&result->parser, &error->location)) {
             valid_utf8 = false;
         }
     }
@@ -8172,19 +8172,18 @@ pm_parse_input_error(const pm_parse_result_t *result)
  * result object is zeroed out.
  */
 static VALUE
-pm_parse_input(pm_parse_result_t *result, VALUE filepath)
+pm_parse_process(pm_parse_result_t *result, pm_node_t *node)
 {
-    // Set up the parser and parse the input.
-    pm_options_filepath_set(&result->options, RSTRING_PTR(filepath));
-    RB_GC_GUARD(filepath);
-
     pm_parser_t *parser = &result->parser;
-    pm_parser_init(parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options);
-    const pm_node_t *node = pm_parse(parser);
+
+    // First, set up the scope node so that the AST node is attached and can be
+    // freed regardless of whether or we return an error.
+    pm_scope_node_t *scope_node = &result->node;
+    pm_scope_node_init(node, scope_node, NULL);
 
     // If there are errors, raise an appropriate error and free the result.
-    if (result->parser.error_list.size > 0) {
-        VALUE error = pm_parse_input_error(result);
+    if (parser->error_list.size > 0) {
+        VALUE error = pm_parse_process_error(result);
 
         // TODO: We need to set the backtrace.
         // rb_funcallv(error, rb_intern("set_backtrace"), 1, &path);
@@ -8208,9 +8207,6 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath)
 
     // Now set up the constant pool and intern all of the various constants into
     // their corresponding IDs.
-    pm_scope_node_t *scope_node = &result->node;
-    pm_scope_node_init(node, scope_node, NULL);
-
     scope_node->encoding = rb_enc_find(parser->encoding->name);
     if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name);
 
@@ -8300,7 +8296,13 @@ pm_load_file(pm_parse_result_t *result, VALUE filepath)
 VALUE
 pm_parse_file(pm_parse_result_t *result, VALUE filepath)
 {
-    VALUE error = pm_parse_input(result, filepath);
+    pm_options_filepath_set(&result->options, RSTRING_PTR(filepath));
+    RB_GC_GUARD(filepath);
+
+    pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options);
+    pm_node_t *node = pm_parse(&result->parser);
+
+    VALUE error = pm_parse_process(result, node);
 
     // If we're parsing a filepath, then we need to potentially support the
     // SCRIPT_LINES__ constant, which can be a hash that has an array of lines
@@ -8335,9 +8337,9 @@ pm_load_parse_file(pm_parse_result_t *result, VALUE filepath)
 
 /**
  * Parse the given source that corresponds to the given filepath and store the
- * resulting scope node in the given parse result struct. This function could
- * potentially raise a Ruby error. It is assumed that the parse result object is
- * zeroed out.
+ * resulting scope node in the given parse result struct. It is assumed that the
+ * parse result object is zeroed out. If the string fails to parse, then a Ruby
+ * error is returned.
  */
 VALUE
 pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath)
@@ -8347,7 +8349,54 @@ pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath)
     rb_encoding *encoding = rb_enc_get(source);
     pm_options_encoding_set(&result->options, rb_enc_name(encoding));
 
-    return pm_parse_input(result, filepath);
+    pm_options_filepath_set(&result->options, RSTRING_PTR(filepath));
+    RB_GC_GUARD(filepath);
+
+    pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options);
+    pm_node_t *node = pm_parse(&result->parser);
+
+    return pm_parse_process(result, node);
+}
+
+/**
+ * An implementation of fgets that is suitable for use with Ruby IO objects.
+ */
+static char *
+pm_parse_stdin_fgets(char *string, int size, void *stream)
+{
+    RUBY_ASSERT(size > 0);
+
+    VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
+    if (NIL_P(line)) {
+        return NULL;
+    }
+
+    const char *cstr = StringValueCStr(line);
+    size_t length = strlen(cstr);
+
+    memcpy(string, cstr, length);
+    string[length] = '\0';
+
+    return string;
+}
+
+/**
+ * Parse the source off STDIN and store the resulting scope node in the given
+ * parse result struct. It is assumed that the parse result object is zeroed
+ * out. If the stream fails to parse, then a Ruby error is returned.
+ */
+VALUE
+pm_parse_stdin(pm_parse_result_t *result)
+{
+    pm_buffer_t buffer;
+    pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) rb_stdin, pm_parse_stdin_fgets, &result->options);
+
+    // Copy the allocated buffer contents into the input string so that it gets
+    // freed. At this point we've handed over ownership, so we don't need to
+    // free the buffer itself.
+    pm_string_owned_init(&result->input, (uint8_t *) pm_buffer_value(&buffer), pm_buffer_length(&buffer));
+
+    return pm_parse_process(result, node);
 }
 
 #undef NEW_ISEQ
diff --git a/prism_compile.h b/prism_compile.h
index d170e1b729..427fa54b51 100644
--- a/prism_compile.h
+++ b/prism_compile.h
@@ -51,6 +51,7 @@ VALUE pm_load_file(pm_parse_result_t *result, VALUE filepath);
 VALUE pm_parse_file(pm_parse_result_t *result, VALUE filepath);
 VALUE pm_load_parse_file(pm_parse_result_t *result, VALUE filepath);
 VALUE pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath);
+VALUE pm_parse_stdin(pm_parse_result_t *result);
 void pm_parse_result_free(pm_parse_result_t *result);
 
 rb_iseq_t *pm_iseq_new(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, enum rb_iseq_type);
diff --git a/ruby.c b/ruby.c
index d2642c96c5..bcc4c350f2 100644
--- a/ruby.c
+++ b/ruby.c
@@ -2122,7 +2122,11 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result)
 
     VALUE error;
     if (strcmp(opt->script, "-") == 0) {
-        rb_raise(rb_eRuntimeError, "Prism support for streaming code from stdin is not currently supported");
+        pm_options_command_line_set(options, command_line);
+        pm_options_filepath_set(options, "-");
+
+        prism_opt_init(opt);
+        error = pm_parse_stdin(result);
     }
     else if (opt->e_script) {
         command_line |= PM_OPTIONS_COMMAND_LINE_E;