[ruby/prism] Add cp855 encoding

https://github.com/ruby/prism/commit/8069d143f5
This commit is contained in:
Kevin Newton 2023-11-11 23:15:58 -05:00 коммит произвёл git
Родитель 5d7e6842b5
Коммит f60b974393
5 изменённых файлов: 78 добавлений и 42 удалений

Просмотреть файл

@ -158,9 +158,10 @@ extern const uint8_t pm_encoding_unicode_table[256];
extern pm_encoding_t pm_encoding_ascii; extern pm_encoding_t pm_encoding_ascii;
extern pm_encoding_t pm_encoding_ascii_8bit; extern pm_encoding_t pm_encoding_ascii_8bit;
extern pm_encoding_t pm_encoding_big5; extern pm_encoding_t pm_encoding_big5;
extern pm_encoding_t pm_encoding_cp51932;
extern pm_encoding_t pm_encoding_cp850; extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852; extern pm_encoding_t pm_encoding_cp852;
extern pm_encoding_t pm_encoding_cp51932; extern pm_encoding_t pm_encoding_cp855;
extern pm_encoding_t pm_encoding_euc_jp; extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gbk; extern pm_encoding_t pm_encoding_gbk;
extern pm_encoding_t pm_encoding_iso_8859_1; extern pm_encoding_t pm_encoding_iso_8859_1;

Просмотреть файл

@ -72,6 +72,30 @@ static uint8_t pm_encoding_cp852_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
}; };
/**
* Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding CP855 character.
*/
static uint8_t pm_encoding_cp855_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
};
/** /**
* Each element of the following table contains a bitfield that indicates a * Each element of the following table contains a bitfield that indicates a
* piece of information about the corresponding ISO-8859-1 character. * piece of information about the corresponding ISO-8859-1 character.
@ -739,6 +763,7 @@ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
PRISM_ENCODING_TABLE(cp850) PRISM_ENCODING_TABLE(cp850)
PRISM_ENCODING_TABLE(cp852) PRISM_ENCODING_TABLE(cp852)
PRISM_ENCODING_TABLE(cp855)
PRISM_ENCODING_TABLE(iso_8859_1) PRISM_ENCODING_TABLE(iso_8859_1)
PRISM_ENCODING_TABLE(iso_8859_2) PRISM_ENCODING_TABLE(iso_8859_2)
PRISM_ENCODING_TABLE(iso_8859_3) PRISM_ENCODING_TABLE(iso_8859_3)
@ -807,6 +832,16 @@ pm_encoding_t pm_encoding_cp852 = {
.multibyte = false .multibyte = false
}; };
/** CP855 */
pm_encoding_t pm_encoding_cp855 = {
.name = "CP855",
.char_width = pm_encoding_single_char_width,
.alnum_char = pm_encoding_cp855_alnum_char,
.alpha_char = pm_encoding_cp855_alpha_char,
.isupper_char = pm_encoding_cp855_isupper_char,
.multibyte = false
};
/** ISO-8859-1 */ /** ISO-8859-1 */
pm_encoding_t pm_encoding_iso_8859_1 = { pm_encoding_t pm_encoding_iso_8859_1 = {
.name = "ISO-8859-1", .name = "ISO-8859-1",

Просмотреть файл

@ -6070,6 +6070,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
ENCODING1("Big5", pm_encoding_big5); ENCODING1("Big5", pm_encoding_big5);
ENCODING2("CP850", "IBM850", pm_encoding_cp850); ENCODING2("CP850", "IBM850", pm_encoding_cp850);
ENCODING1("CP852", pm_encoding_cp852); ENCODING1("CP852", pm_encoding_cp852);
ENCODING1("CP855", pm_encoding_cp855);
ENCODING2("GBK", "CP936", pm_encoding_gbk); ENCODING2("GBK", "CP936", pm_encoding_gbk);
ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1); ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2); ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);

Просмотреть файл

@ -5,46 +5,45 @@ require_relative "test_helper"
module Prism module Prism
class EncodingTest < TestCase class EncodingTest < TestCase
[ [
"US-ASCII", Encoding::ASCII,
"ASCII-8BIT", Encoding::ASCII_8BIT,
"Big5", Encoding::Big5,
"CP850", Encoding::CP51932,
"CP852", Encoding::CP850,
"CP51932", Encoding::CP852,
"EUC-JP", Encoding::CP855,
"GBK", Encoding::EUC_JP,
"ISO-8859-1", Encoding::GBK,
"ISO-8859-2", Encoding::ISO_8859_1,
"ISO-8859-3", Encoding::ISO_8859_2,
"ISO-8859-4", Encoding::ISO_8859_3,
"ISO-8859-5", Encoding::ISO_8859_4,
"ISO-8859-6", Encoding::ISO_8859_5,
"ISO-8859-7", Encoding::ISO_8859_6,
"ISO-8859-8", Encoding::ISO_8859_7,
"ISO-8859-9", Encoding::ISO_8859_8,
"ISO-8859-10", Encoding::ISO_8859_9,
"ISO-8859-11", Encoding::ISO_8859_10,
"ISO-8859-13", Encoding::ISO_8859_11,
"ISO-8859-14", Encoding::ISO_8859_13,
"ISO-8859-15", Encoding::ISO_8859_14,
"ISO-8859-16", Encoding::ISO_8859_15,
"KOI8-R", Encoding::ISO_8859_16,
"Shift_JIS", Encoding::KOI8_R,
"UTF-8", Encoding::Shift_JIS,
"UTF8-MAC", Encoding::UTF_8,
"Windows-1250", Encoding::UTF8_MAC,
"Windows-1251", Encoding::Windows_1250,
"Windows-1252", Encoding::Windows_1251,
"Windows-1253", Encoding::Windows_1252,
"Windows-1254", Encoding::Windows_1253,
"Windows-1255", Encoding::Windows_1254,
"Windows-1256", Encoding::Windows_1255,
"Windows-1257", Encoding::Windows_1256,
"Windows-1258", Encoding::Windows_1257,
"Windows-31J" Encoding::Windows_1258,
].each do |canonical_name| Encoding::Windows_31J
encoding = Encoding.find(canonical_name) ].each do |encoding|
encoding.names.each do |name| encoding.names.each do |name|
# Even though UTF-8-MAC is an alias for UTF8-MAC, CRuby treats it as # Even though UTF-8-MAC is an alias for UTF8-MAC, CRuby treats it as
# UTF-8. So we'll skip this test. # UTF-8. So we'll skip this test.

Просмотреть файл

@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
module Prism module Prism
class NewlineTest < TestCase class NewlineTest < TestCase
base = File.expand_path("../", __FILE__) base = File.expand_path("../", __FILE__)
filepaths = Dir["*.rb", base: base] - %w[unescape_test.rb] filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb]
filepaths.each do |relative| filepaths.each do |relative|
define_method("test_newline_flags_#{relative}") do define_method("test_newline_flags_#{relative}") do