[ruby/prism] Big5 HKSCS encoding

https://github.com/ruby/prism/commit/3ca9823eb4
This commit is contained in:
Ryan Garver 2023-11-17 20:35:28 -08:00 коммит произвёл Kevin Newton
Родитель bbf14bbba6
Коммит cd4316a51f
5 изменённых файлов: 58 добавлений и 0 удалений

Просмотреть файл

@ -85,6 +85,7 @@ Gem::Specification.new do |spec|
"lib/prism/visitor.rb",
"src/diagnostic.c",
"src/enc/pm_big5.c",
"src/enc/pm_big5_hkscs.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",

54
prism/enc/pm_big5_hkscs.c Normal file
Просмотреть файл

@ -0,0 +1,54 @@
#include "prism/enc/pm_encoding.h"
static size_t
pm_encoding_big5_hkscs_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
}
// These are the double byte characters.
if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
return 2;
}
return 0;
}
static size_t
pm_encoding_big5_hkscs_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
pm_encoding_big5_hkscs_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
pm_encoding_big5_hkscs_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return false;
}
}
/** Big5 encoding */
pm_encoding_t pm_encoding_big5_hkscs = {
.name = "big5-hkscs",
.char_width = pm_encoding_big5_hkscs_char_width,
.alnum_char = pm_encoding_big5_hkscs_alnum_char,
.alpha_char = pm_encoding_big5_hkscs_alpha_char,
.isupper_char = pm_encoding_big5_hkscs_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -158,6 +158,7 @@ extern const uint8_t pm_encoding_unicode_table[256];
extern pm_encoding_t pm_encoding_ascii;
extern pm_encoding_t pm_encoding_ascii_8bit;
extern pm_encoding_t pm_encoding_big5;
extern pm_encoding_t pm_encoding_big5_hkscs;
extern pm_encoding_t pm_encoding_cp51932;
extern pm_encoding_t pm_encoding_cp850;
extern pm_encoding_t pm_encoding_cp852;

Просмотреть файл

@ -6062,6 +6062,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
case 'B': case 'b':
ENCODING1("BINARY", pm_encoding_ascii_8bit);
ENCODING1("Big5", pm_encoding_big5);
ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
break;
case 'C': case 'c':
ENCODING1("CP437", pm_encoding_ibm437);

Просмотреть файл

@ -57,6 +57,7 @@ module Prism
Encoding::Windows_1258 => 0x00...0x100,
Encoding::Windows_874 => 0x00...0x100,
Encoding::Big5 => 0x00...0x10000,
Encoding::Big5_HKSCS => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,