[PRISM] Alias CP51932 to EUC-JP

This commit is contained in:
Kevin Newton 2023-11-29 12:53:42 -05:00
Родитель 9ba92327f2
Коммит 7b5bb978fb
4 изменённых файлов: 13 добавлений и 61 удалений

Просмотреть файл

@ -88,7 +88,6 @@ Gem::Specification.new do |spec|
"src/enc/pm_big5.c",
"src/enc/pm_cp949.c",
"src/enc/pm_cp950.c",
"src/enc/pm_cp51932.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",

Просмотреть файл

@ -1,57 +0,0 @@
#include "prism/enc/pm_encoding.h"
static size_t
pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
}
// These are the double byte characters.
if (
(n > 1) &&
((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
(b[1] >= 0xa1 && b[1] <= 0xfe)
) {
return 2;
}
return 0;
}
static size_t
pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp51932_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp51932_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_cp51932_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return 0;
}
}
/** cp51932 encoding */
pm_encoding_t pm_encoding_cp51932 = {
.name = "cp51932",
.char_width = pm_encoding_cp51932_char_width,
.alnum_char = pm_encoding_cp51932_alnum_char,
.alpha_char = pm_encoding_cp51932_alpha_char,
.isupper_char = pm_encoding_cp51932_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -67,3 +67,13 @@ pm_encoding_t pm_encoding_euc_jp = {
.isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
};
/** CP51932 encoding */
pm_encoding_t pm_encoding_cp51932 = {
.name = "CP51932",
.char_width = pm_encoding_euc_jp_char_width,
.alnum_char = pm_encoding_euc_jp_alnum_char,
.alpha_char = pm_encoding_euc_jp_alpha_char,
.isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -71,14 +71,13 @@ module Prism
Encoding::Big5_UAO => 0x00...0x10000,
Encoding::CP949 => 0x00...0x10000,
Encoding::CP950 => 0x00...0x10000,
Encoding::CP51932 => 0x00...0x10000,
Encoding::GBK => 0x00...0x10000,
Encoding::MACJAPANESE => 0x00...0x10000,
Encoding::Shift_JIS => 0x00...0x10000,
Encoding::SJIS_DoCoMo => 0x00...0x10000,
Encoding::SJIS_KDDI => 0x00...0x10000,
Encoding::SJIS_SoftBank => 0x00...0x10000,
Encoding::Windows_31J => 0x00...0x10000
Encoding::Windows_31J => 0x00...0x10000,
}
# By default we don't test every codepoint in these encodings because they
@ -86,7 +85,8 @@ module Prism
# suite.
if ENV["PRISM_TEST_ALL_ENCODINGS"]
encodings.merge!(
Encoding::EUC_JP => 0x00...0x1000000,
Encoding::CP51932 => [*(0x00...0x10000), *(0x00...0x10000).map { |bytes| bytes | 0x8F0000 }],
Encoding::EUC_JP => [*(0x00...0x10000), *(0x00...0x10000).map { |bytes| bytes | 0x8F0000 }],
Encoding::UTF_8 => 0x00...0x110000,
Encoding::UTF8_MAC => 0x00...0x110000,
Encoding::UTF8_DoCoMo => 0x00...0x110000,