From 53cc2723877f7794807684e31a530daca1a72ed6 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 25 Mar 2024 09:27:27 -0400
Subject: [PATCH] [ruby/prism] Handle CLRF in regexp

https://github.com/ruby/prism/commit/b96bada9ae
---
 prism/parser.h |  4 ++--
 prism/prism.c  | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/prism/parser.h b/prism/parser.h
index b685fa377d..7e4bb99197 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -173,7 +173,7 @@ typedef struct pm_lex_mode {
              * This is the character set that should be used to delimit the
              * tokens within the regular expression.
              */
-            uint8_t breakpoints[6];
+            uint8_t breakpoints[7];
         } regexp;
 
         struct {
@@ -206,7 +206,7 @@ typedef struct pm_lex_mode {
              * This is the character set that should be used to delimit the
              * tokens within the string.
              */
-            uint8_t breakpoints[6];
+            uint8_t breakpoints[7];
         } string;
 
         struct {
diff --git a/prism/prism.c b/prism/prism.c
index 6aa611624a..58c70dba69 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -308,14 +308,14 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
     // regular expression. We'll use strpbrk to find the first of these
     // characters.
     uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
-    memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+    memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
 
     // First we'll add the terminator.
-    breakpoints[3] = terminator;
+    breakpoints[4] = terminator;
 
     // Next, if there is an incrementor, then we'll check for that as well.
     if (incrementor != '\0') {
-        breakpoints[4] = incrementor;
+        breakpoints[5] = incrementor;
     }
 
     return lex_mode_push(parser, lex_mode);
@@ -10835,6 +10835,19 @@ parser_lex(pm_parser_t *parser) {
                         parser->current.end = breakpoint + 1;
                         breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
                         break;
+                    case '\r':
+                        if (peek_at(parser, breakpoint + 1) != '\n') {
+                            parser->current.end = breakpoint + 1;
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                            break;
+                        }
+
+                        parser->current.end = breakpoint + 1;
+                        pm_regexp_token_buffer_escape(parser, &token_buffer);
+                        breakpoint++;
+                        token_buffer.base.cursor = breakpoint;
+
+                        /* fallthrough */
                     case '\n':
                         // If we've hit a newline, then we need to track that in
                         // the list of newlines.