From f60b974393d76126bf6fc4566f7bf4c754c3ca70 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 11 Nov 2023 23:15:58 -0500 Subject: [PATCH] [ruby/prism] Add cp855 encoding https://github.com/ruby/prism/commit/8069d143f5 --- prism/enc/pm_encoding.h | 3 +- prism/enc/pm_tables.c | 35 ++++++++++++++++ prism/prism.c | 1 + test/prism/encoding_test.rb | 79 ++++++++++++++++++------------------- test/prism/newline_test.rb | 2 +- 5 files changed, 78 insertions(+), 42 deletions(-) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 7f5785ea8b..0d7a414383 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -158,9 +158,10 @@ extern const uint8_t pm_encoding_unicode_table[256]; extern pm_encoding_t pm_encoding_ascii; extern pm_encoding_t pm_encoding_ascii_8bit; extern pm_encoding_t pm_encoding_big5; +extern pm_encoding_t pm_encoding_cp51932; extern pm_encoding_t pm_encoding_cp850; extern pm_encoding_t pm_encoding_cp852; -extern pm_encoding_t pm_encoding_cp51932; +extern pm_encoding_t pm_encoding_cp855; extern pm_encoding_t pm_encoding_euc_jp; extern pm_encoding_t pm_encoding_gbk; extern pm_encoding_t pm_encoding_iso_8859_1; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index 685bd838e7..9bb56e81ce 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -72,6 +72,30 @@ static uint8_t pm_encoding_cp852_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding CP855 character. + */ +static uint8_t pm_encoding_cp855_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding ISO-8859-1 character. @@ -739,6 +763,7 @@ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t PRISM_ENCODING_TABLE(cp850) PRISM_ENCODING_TABLE(cp852) +PRISM_ENCODING_TABLE(cp855) PRISM_ENCODING_TABLE(iso_8859_1) PRISM_ENCODING_TABLE(iso_8859_2) PRISM_ENCODING_TABLE(iso_8859_3) @@ -807,6 +832,16 @@ pm_encoding_t pm_encoding_cp852 = { .multibyte = false }; +/** CP855 */ +pm_encoding_t pm_encoding_cp855 = { + .name = "CP855", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_cp855_alnum_char, + .alpha_char = pm_encoding_cp855_alpha_char, + .isupper_char = pm_encoding_cp855_isupper_char, + .multibyte = false +}; + /** ISO-8859-1 */ pm_encoding_t pm_encoding_iso_8859_1 = { .name = "ISO-8859-1", diff --git a/prism/prism.c b/prism/prism.c index 12ce4ecaee..e9cbe08a68 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6070,6 +6070,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("Big5", pm_encoding_big5); ENCODING2("CP850", "IBM850", pm_encoding_cp850); ENCODING1("CP852", pm_encoding_cp852); + ENCODING1("CP855", pm_encoding_cp855); ENCODING2("GBK", "CP936", pm_encoding_gbk); ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1); ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 9f209691d0..bd0911e96e 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -5,46 +5,45 @@ require_relative "test_helper" module Prism class EncodingTest < TestCase [ - "US-ASCII", - "ASCII-8BIT", - "Big5", - "CP850", - "CP852", - "CP51932", - "EUC-JP", - "GBK", - "ISO-8859-1", - "ISO-8859-2", - "ISO-8859-3", - "ISO-8859-4", - "ISO-8859-5", - "ISO-8859-6", - "ISO-8859-7", - "ISO-8859-8", - "ISO-8859-9", - "ISO-8859-10", - "ISO-8859-11", - "ISO-8859-13", - "ISO-8859-14", - "ISO-8859-15", - "ISO-8859-16", - "KOI8-R", - "Shift_JIS", - "UTF-8", - "UTF8-MAC", - "Windows-1250", - "Windows-1251", - "Windows-1252", - "Windows-1253", - "Windows-1254", - "Windows-1255", - "Windows-1256", - "Windows-1257", - "Windows-1258", - "Windows-31J" - ].each do |canonical_name| - encoding = Encoding.find(canonical_name) - + Encoding::ASCII, + Encoding::ASCII_8BIT, + Encoding::Big5, + Encoding::CP51932, + Encoding::CP850, + Encoding::CP852, + Encoding::CP855, + Encoding::EUC_JP, + Encoding::GBK, + Encoding::ISO_8859_1, + Encoding::ISO_8859_2, + Encoding::ISO_8859_3, + Encoding::ISO_8859_4, + Encoding::ISO_8859_5, + Encoding::ISO_8859_6, + Encoding::ISO_8859_7, + Encoding::ISO_8859_8, + Encoding::ISO_8859_9, + Encoding::ISO_8859_10, + Encoding::ISO_8859_11, + Encoding::ISO_8859_13, + Encoding::ISO_8859_14, + Encoding::ISO_8859_15, + Encoding::ISO_8859_16, + Encoding::KOI8_R, + Encoding::Shift_JIS, + Encoding::UTF_8, + Encoding::UTF8_MAC, + Encoding::Windows_1250, + Encoding::Windows_1251, + Encoding::Windows_1252, + Encoding::Windows_1253, + Encoding::Windows_1254, + Encoding::Windows_1255, + Encoding::Windows_1256, + Encoding::Windows_1257, + Encoding::Windows_1258, + Encoding::Windows_31J + ].each do |encoding| encoding.names.each do |name| # Even though UTF-8-MAC is an alias for UTF8-MAC, CRuby treats it as # UTF-8. So we'll skip this test. diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb index c20a99a398..d998502869 100644 --- a/test/prism/newline_test.rb +++ b/test/prism/newline_test.rb @@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence) module Prism class NewlineTest < TestCase base = File.expand_path("../", __FILE__) - filepaths = Dir["*.rb", base: base] - %w[unescape_test.rb] + filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb] filepaths.each do |relative| define_method("test_newline_flags_#{relative}") do