From 53cc2723877f7794807684e31a530daca1a72ed6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 25 Mar 2024 09:27:27 -0400 Subject: [PATCH] [ruby/prism] Handle CLRF in regexp https://github.com/ruby/prism/commit/b96bada9ae --- prism/parser.h | 4 ++-- prism/prism.c | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/prism/parser.h b/prism/parser.h index b685fa377d..7e4bb99197 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -173,7 +173,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the regular expression. */ - uint8_t breakpoints[6]; + uint8_t breakpoints[7]; } regexp; struct { @@ -206,7 +206,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the string. */ - uint8_t breakpoints[6]; + uint8_t breakpoints[7]; } string; struct { diff --git a/prism/prism.c b/prism/prism.c index 6aa611624a..58c70dba69 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -308,14 +308,14 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato // regular expression. We'll use strpbrk to find the first of these // characters. uint8_t *breakpoints = lex_mode.as.regexp.breakpoints; - memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); + memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); // First we'll add the terminator. - breakpoints[3] = terminator; + breakpoints[4] = terminator; // Next, if there is an incrementor, then we'll check for that as well. if (incrementor != '\0') { - breakpoints[4] = incrementor; + breakpoints[5] = incrementor; } return lex_mode_push(parser, lex_mode); @@ -10835,6 +10835,19 @@ parser_lex(pm_parser_t *parser) { parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); break; + case '\r': + if (peek_at(parser, breakpoint + 1) != '\n') { + parser->current.end = breakpoint + 1; + breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); + break; + } + + parser->current.end = breakpoint + 1; + pm_regexp_token_buffer_escape(parser, &token_buffer); + breakpoint++; + token_buffer.base.cursor = breakpoint; + + /* fallthrough */ case '\n': // If we've hit a newline, then we need to track that in // the list of newlines.