merge revision(s) a8ba1ddd78544b4bda749051d44f7b2a8a0ec5ff: [Backport #19455]

Use UTF-8 encoding for literal extended regexps with UTF-8 characters in comments Fixes [Bug #19455] --- re.c | 9 ++++++++- test/ruby/test_regexp.rb | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-)
2023-07-17 17:23:31 +09:00 · 2023-07-17 17:23:31 +09:00 · be09d77b96
--- a/re.c
+++ b/re.c
@ -2926,7 +2926,11 @@ escape_asis:
          case '#':
            if (extended_mode && !in_char_class) {
                /* consume and ignore comment in extended regexp */
-                while ((p < end) && ((c = *p++) != '\n'));
+                while ((p < end) && ((c = *p++) != '\n')) {
+                    if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
+                        *encp = enc;
+                    }
+                }
                break;
            }
            rb_str_buf_cat(buf, (char *)&c, 1);
@ -2961,6 +2965,9 @@ escape_asis:
                        switch (c = *p++) {
                          default:
                            if (!(c & 0x80)) break;
+                            if (!*encp && enc == rb_utf8_encoding()) {
+                                *encp = enc;
+                            }
                            --p;
                            /* fallthrough */
                          case '\\':
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@ -200,6 +200,13 @@ class TestRegexp < Test::Unit::TestCase
    RUBY
  end

+  def test_utf8_comment_in_usascii_extended_regexp_bug_19455
+    assert_separately([], <<-RUBY)
+      assert_equal(Encoding::UTF_8, /(?#\u1000)/x.encoding)
+      assert_equal(Encoding::UTF_8, /#\u1000/x.encoding)
+    RUBY
+  end
+
  def test_union
    assert_equal :ok, begin
      Regexp.union(
--- a/version.h
+++ b/version.h
@ -11,7 +11,7 @@
 # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
 #define RUBY_VERSION_TEENY 2
 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 90
+#define RUBY_PATCHLEVEL 91

 #include "ruby/version.h"
 #include "ruby/internal/abi.h"