зеркало из https://github.com/github/ruby.git
[ruby/prism] Add cp855 encoding
https://github.com/ruby/prism/commit/8069d143f5
This commit is contained in:
Родитель
5d7e6842b5
Коммит
f60b974393
|
@ -158,9 +158,10 @@ extern const uint8_t pm_encoding_unicode_table[256];
|
|||
extern pm_encoding_t pm_encoding_ascii;
|
||||
extern pm_encoding_t pm_encoding_ascii_8bit;
|
||||
extern pm_encoding_t pm_encoding_big5;
|
||||
extern pm_encoding_t pm_encoding_cp51932;
|
||||
extern pm_encoding_t pm_encoding_cp850;
|
||||
extern pm_encoding_t pm_encoding_cp852;
|
||||
extern pm_encoding_t pm_encoding_cp51932;
|
||||
extern pm_encoding_t pm_encoding_cp855;
|
||||
extern pm_encoding_t pm_encoding_euc_jp;
|
||||
extern pm_encoding_t pm_encoding_gbk;
|
||||
extern pm_encoding_t pm_encoding_iso_8859_1;
|
||||
|
|
|
@ -72,6 +72,30 @@ static uint8_t pm_encoding_cp852_table[256] = {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
||||
};
|
||||
|
||||
/**
|
||||
* Each element of the following table contains a bitfield that indicates a
|
||||
* piece of information about the corresponding CP855 character.
|
||||
*/
|
||||
static uint8_t pm_encoding_cp855_table[256] = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
|
||||
0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
|
||||
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
||||
};
|
||||
|
||||
/**
|
||||
* Each element of the following table contains a bitfield that indicates a
|
||||
* piece of information about the corresponding ISO-8859-1 character.
|
||||
|
@ -739,6 +763,7 @@ pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
|
|||
|
||||
PRISM_ENCODING_TABLE(cp850)
|
||||
PRISM_ENCODING_TABLE(cp852)
|
||||
PRISM_ENCODING_TABLE(cp855)
|
||||
PRISM_ENCODING_TABLE(iso_8859_1)
|
||||
PRISM_ENCODING_TABLE(iso_8859_2)
|
||||
PRISM_ENCODING_TABLE(iso_8859_3)
|
||||
|
@ -807,6 +832,16 @@ pm_encoding_t pm_encoding_cp852 = {
|
|||
.multibyte = false
|
||||
};
|
||||
|
||||
/** CP855 */
|
||||
pm_encoding_t pm_encoding_cp855 = {
|
||||
.name = "CP855",
|
||||
.char_width = pm_encoding_single_char_width,
|
||||
.alnum_char = pm_encoding_cp855_alnum_char,
|
||||
.alpha_char = pm_encoding_cp855_alpha_char,
|
||||
.isupper_char = pm_encoding_cp855_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
||||
/** ISO-8859-1 */
|
||||
pm_encoding_t pm_encoding_iso_8859_1 = {
|
||||
.name = "ISO-8859-1",
|
||||
|
|
|
@ -6070,6 +6070,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
|
|||
ENCODING1("Big5", pm_encoding_big5);
|
||||
ENCODING2("CP850", "IBM850", pm_encoding_cp850);
|
||||
ENCODING1("CP852", pm_encoding_cp852);
|
||||
ENCODING1("CP855", pm_encoding_cp855);
|
||||
ENCODING2("GBK", "CP936", pm_encoding_gbk);
|
||||
ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
|
||||
ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);
|
||||
|
|
|
@ -5,46 +5,45 @@ require_relative "test_helper"
|
|||
module Prism
|
||||
class EncodingTest < TestCase
|
||||
[
|
||||
"US-ASCII",
|
||||
"ASCII-8BIT",
|
||||
"Big5",
|
||||
"CP850",
|
||||
"CP852",
|
||||
"CP51932",
|
||||
"EUC-JP",
|
||||
"GBK",
|
||||
"ISO-8859-1",
|
||||
"ISO-8859-2",
|
||||
"ISO-8859-3",
|
||||
"ISO-8859-4",
|
||||
"ISO-8859-5",
|
||||
"ISO-8859-6",
|
||||
"ISO-8859-7",
|
||||
"ISO-8859-8",
|
||||
"ISO-8859-9",
|
||||
"ISO-8859-10",
|
||||
"ISO-8859-11",
|
||||
"ISO-8859-13",
|
||||
"ISO-8859-14",
|
||||
"ISO-8859-15",
|
||||
"ISO-8859-16",
|
||||
"KOI8-R",
|
||||
"Shift_JIS",
|
||||
"UTF-8",
|
||||
"UTF8-MAC",
|
||||
"Windows-1250",
|
||||
"Windows-1251",
|
||||
"Windows-1252",
|
||||
"Windows-1253",
|
||||
"Windows-1254",
|
||||
"Windows-1255",
|
||||
"Windows-1256",
|
||||
"Windows-1257",
|
||||
"Windows-1258",
|
||||
"Windows-31J"
|
||||
].each do |canonical_name|
|
||||
encoding = Encoding.find(canonical_name)
|
||||
|
||||
Encoding::ASCII,
|
||||
Encoding::ASCII_8BIT,
|
||||
Encoding::Big5,
|
||||
Encoding::CP51932,
|
||||
Encoding::CP850,
|
||||
Encoding::CP852,
|
||||
Encoding::CP855,
|
||||
Encoding::EUC_JP,
|
||||
Encoding::GBK,
|
||||
Encoding::ISO_8859_1,
|
||||
Encoding::ISO_8859_2,
|
||||
Encoding::ISO_8859_3,
|
||||
Encoding::ISO_8859_4,
|
||||
Encoding::ISO_8859_5,
|
||||
Encoding::ISO_8859_6,
|
||||
Encoding::ISO_8859_7,
|
||||
Encoding::ISO_8859_8,
|
||||
Encoding::ISO_8859_9,
|
||||
Encoding::ISO_8859_10,
|
||||
Encoding::ISO_8859_11,
|
||||
Encoding::ISO_8859_13,
|
||||
Encoding::ISO_8859_14,
|
||||
Encoding::ISO_8859_15,
|
||||
Encoding::ISO_8859_16,
|
||||
Encoding::KOI8_R,
|
||||
Encoding::Shift_JIS,
|
||||
Encoding::UTF_8,
|
||||
Encoding::UTF8_MAC,
|
||||
Encoding::Windows_1250,
|
||||
Encoding::Windows_1251,
|
||||
Encoding::Windows_1252,
|
||||
Encoding::Windows_1253,
|
||||
Encoding::Windows_1254,
|
||||
Encoding::Windows_1255,
|
||||
Encoding::Windows_1256,
|
||||
Encoding::Windows_1257,
|
||||
Encoding::Windows_1258,
|
||||
Encoding::Windows_31J
|
||||
].each do |encoding|
|
||||
encoding.names.each do |name|
|
||||
# Even though UTF-8-MAC is an alias for UTF8-MAC, CRuby treats it as
|
||||
# UTF-8. So we'll skip this test.
|
||||
|
|
|
@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
|
|||
module Prism
|
||||
class NewlineTest < TestCase
|
||||
base = File.expand_path("../", __FILE__)
|
||||
filepaths = Dir["*.rb", base: base] - %w[unescape_test.rb]
|
||||
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb]
|
||||
|
||||
filepaths.each do |relative|
|
||||
define_method("test_newline_flags_#{relative}") do
|
||||
|
|
Загрузка…
Ссылка в новой задаче