[ruby/prism] Add other UTF8 encodings

https://github.com/ruby/prism/commit/709fb6e09f
This commit is contained in:
Kevin Newton 2023-11-29 11:18:38 -05:00
Родитель ba1cdadfc8
Коммит 219c3c1c09
4 изменённых файлов: 46 добавлений и 6 удалений

Просмотреть файл

@ -216,6 +216,9 @@ extern pm_encoding_t pm_encoding_shift_jis;
extern pm_encoding_t pm_encoding_tis_620;
extern pm_encoding_t pm_encoding_utf_8;
extern pm_encoding_t pm_encoding_utf8_mac;
extern pm_encoding_t pm_encoding_utf8_docomo;
extern pm_encoding_t pm_encoding_utf8_kddi;
extern pm_encoding_t pm_encoding_utf8_softbank;
extern pm_encoding_t pm_encoding_windows_1250;
extern pm_encoding_t pm_encoding_windows_1251;
extern pm_encoding_t pm_encoding_windows_1252;

Просмотреть файл

@ -2350,7 +2350,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
/** UTF-8 */
pm_encoding_t pm_encoding_utf_8 = {
.name = "utf-8",
.name = "UTF-8",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
@ -2358,9 +2358,39 @@ pm_encoding_t pm_encoding_utf_8 = {
.multibyte = true
};
/** UTF8-mac */
/** UTF8-MAC */
pm_encoding_t pm_encoding_utf8_mac = {
.name = "utf8-mac",
.name = "UTF8-MAC",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
.isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};
/** UTF8-DoCoMo */
pm_encoding_t pm_encoding_utf8_docomo = {
.name = "UTF8-DoCoMo",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
.isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};
/** UTF8-KDDI */
pm_encoding_t pm_encoding_utf8_kddi = {
.name = "UTF8-KDDI",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
.isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};
/** UTF8-SoftBank */
pm_encoding_t pm_encoding_utf8_softbank = {
.name = "UTF8-SoftBank",
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,

Просмотреть файл

@ -6324,6 +6324,9 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
case 'U': case 'u':
ENCODING1("US-ASCII", pm_encoding_ascii);
ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac);
ENCODING1("UTF8-DoCoMo", pm_encoding_utf8_docomo);
ENCODING1("UTF8-KDDI", pm_encoding_utf8_kddi);
ENCODING1("UTF8-SoftBank", pm_encoding_utf8_softbank);
break;
case 'W': case 'w':
ENCODING1("Windows-31J", pm_encoding_windows_31j);

Просмотреть файл

@ -54,6 +54,7 @@ module Prism
Encoding::MACROMANIA => 0x00...0x100,
Encoding::MACTHAI => 0x00...0x100,
Encoding::MACTURKISH => 0x00...0x100,
Encoding::MACUKRAINE => 0x00...0x100,
Encoding::TIS_620 => 0x00...0x100,
Encoding::Windows_1250 => 0x00...0x100,
Encoding::Windows_1251 => 0x00...0x100,
@ -82,9 +83,12 @@ module Prism
# suite.
if ENV["PRISM_TEST_ALL_ENCODINGS"]
encodings.merge!(
Encoding::EUC_JP => 0x00...0x1000000,
Encoding::UTF_8 => 0x00...0x110000,
Encoding::UTF8_MAC => 0x00...0x110000
Encoding::EUC_JP => 0x00...0x1000000,
Encoding::UTF_8 => 0x00...0x110000,
Encoding::UTF8_MAC => 0x00...0x110000,
Encoding::UTF8_DoCoMo => 0x00...0x110000,
Encoding::UTF8_KDDI => 0x00...0x110000,
Encoding::UTF8_SoftBank => 0x00...0x110000
)
end