зеркало из https://github.com/github/ruby.git
[PRISM] Consolidate SJIS encodings
This commit is contained in:
Родитель
219c3c1c09
Коммит
9ba92327f2
|
@ -86,16 +86,14 @@ Gem::Specification.new do |spec|
|
|||
"lib/prism/visitor.rb",
|
||||
"src/diagnostic.c",
|
||||
"src/enc/pm_big5.c",
|
||||
"src/enc/pm_cp51932.c",
|
||||
"src/enc/pm_cp949.c",
|
||||
"src/enc/pm_cp950.c",
|
||||
"src/enc/pm_cp51932.c",
|
||||
"src/enc/pm_euc_jp.c",
|
||||
"src/enc/pm_gbk.c",
|
||||
"src/enc/pm_mac_japanese.c",
|
||||
"src/enc/pm_shift_jis.c",
|
||||
"src/enc/pm_tables.c",
|
||||
"src/enc/pm_unicode.c",
|
||||
"src/enc/pm_windows_31j.c",
|
||||
"src/node.c",
|
||||
"src/pack.c",
|
||||
"src/prettyprint.c",
|
||||
|
|
|
@ -213,6 +213,9 @@ extern pm_encoding_t pm_encoding_mac_thai;
|
|||
extern pm_encoding_t pm_encoding_mac_turkish;
|
||||
extern pm_encoding_t pm_encoding_mac_ukraine;
|
||||
extern pm_encoding_t pm_encoding_shift_jis;
|
||||
extern pm_encoding_t pm_encoding_sjis_docomo;
|
||||
extern pm_encoding_t pm_encoding_sjis_kddi;
|
||||
extern pm_encoding_t pm_encoding_sjis_softbank;
|
||||
extern pm_encoding_t pm_encoding_tis_620;
|
||||
extern pm_encoding_t pm_encoding_utf_8;
|
||||
extern pm_encoding_t pm_encoding_utf8_mac;
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
#include "prism/enc/pm_encoding.h"
|
||||
|
||||
static size_t
|
||||
pm_encoding_mac_japanese_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||
// These are the single byte characters.
|
||||
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// These are the double byte characters.
|
||||
if (
|
||||
(n > 1) &&
|
||||
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
||||
(b[1] >= 0x40 && b[1] <= 0xFC)
|
||||
) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
pm_encoding_mac_japanese_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||
if (pm_encoding_mac_japanese_char_width(b, n) == 1) {
|
||||
return pm_encoding_ascii_alpha_char(b, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
pm_encoding_mac_japanese_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||
if (pm_encoding_mac_japanese_char_width(b, n) == 1) {
|
||||
return pm_encoding_ascii_alnum_char(b, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
pm_encoding_mac_japanese_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||
if (pm_encoding_mac_japanese_char_width(b, n) == 1) {
|
||||
return pm_encoding_ascii_isupper_char(b, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** MacJapanese encoding */
|
||||
pm_encoding_t pm_encoding_mac_japanese = {
|
||||
.name = "MacJapanese",
|
||||
.char_width = pm_encoding_mac_japanese_char_width,
|
||||
.alnum_char = pm_encoding_mac_japanese_alnum_char,
|
||||
.alpha_char = pm_encoding_mac_japanese_alpha_char,
|
||||
.isupper_char = pm_encoding_mac_japanese_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
|
@ -48,7 +48,57 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|||
|
||||
/** Shift_JIS encoding */
|
||||
pm_encoding_t pm_encoding_shift_jis = {
|
||||
.name = "shift_jis",
|
||||
.name = "Shift_JIS",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
||||
/** SJIS-DoCoMo encoding */
|
||||
pm_encoding_t pm_encoding_sjis_docomo = {
|
||||
.name = "SJIS-DoCoMo",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
||||
/** SJIS-KDDI encoding */
|
||||
pm_encoding_t pm_encoding_sjis_kddi = {
|
||||
.name = "SJIS-KDDI",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
||||
/** SJIS-SoftBank encoding */
|
||||
pm_encoding_t pm_encoding_sjis_softbank = {
|
||||
.name = "SJIS-SoftBank",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
||||
/** MacJapanese encoding */
|
||||
pm_encoding_t pm_encoding_mac_japanese = {
|
||||
.name = "MacJapanese",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
||||
/** Windows-31J */
|
||||
pm_encoding_t pm_encoding_windows_31j = {
|
||||
.name = "Windows-31J",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
#include "prism/enc/pm_encoding.h"
|
||||
|
||||
static size_t
|
||||
pm_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||
// These are the single byte characters.
|
||||
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// These are the double byte characters.
|
||||
if (
|
||||
(n > 1) &&
|
||||
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
||||
(b[1] >= 0x40 && b[1] <= 0xFC)
|
||||
) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
pm_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||
if (pm_encoding_windows_31j_char_width(b, n) == 1) {
|
||||
return pm_encoding_ascii_alpha_char(b, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
pm_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||
if (pm_encoding_windows_31j_char_width(b, n) == 1) {
|
||||
return pm_encoding_ascii_alnum_char(b, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||
if (pm_encoding_windows_31j_char_width(b, n) == 1) {
|
||||
return pm_encoding_ascii_isupper_char(b, n);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Windows-31J */
|
||||
pm_encoding_t pm_encoding_windows_31j = {
|
||||
.name = "windows-31j",
|
||||
.char_width = pm_encoding_windows_31j_char_width,
|
||||
.alnum_char = pm_encoding_windows_31j_alnum_char,
|
||||
.alpha_char = pm_encoding_windows_31j_alpha_char,
|
||||
.isupper_char = pm_encoding_windows_31j_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
|
@ -6317,6 +6317,9 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
|
|||
case 'S': case 's':
|
||||
ENCODING1("Shift_JIS", pm_encoding_shift_jis);
|
||||
ENCODING1("SJIS", pm_encoding_windows_31j);
|
||||
ENCODING1("SJIS-DoCoMo", pm_encoding_sjis_docomo);
|
||||
ENCODING1("SJIS-KDDI", pm_encoding_sjis_kddi);
|
||||
ENCODING1("SJIS-SoftBank", pm_encoding_sjis_softbank);
|
||||
break;
|
||||
case 'T': case 't':
|
||||
ENCODING1("TIS-620", pm_encoding_tis_620);
|
||||
|
|
|
@ -7,75 +7,78 @@ require_relative "test_helper"
|
|||
module Prism
|
||||
class EncodingTest < TestCase
|
||||
encodings = {
|
||||
Encoding::ASCII => 0x00...0x100,
|
||||
Encoding::ASCII_8BIT => 0x00...0x100,
|
||||
Encoding::CP850 => 0x00...0x100,
|
||||
Encoding::CP852 => 0x00...0x100,
|
||||
Encoding::CP855 => 0x00...0x100,
|
||||
Encoding::GB1988 => 0x00...0x100,
|
||||
Encoding::IBM437 => 0x00...0x100,
|
||||
Encoding::IBM720 => 0x00...0x100,
|
||||
Encoding::IBM737 => 0x00...0x100,
|
||||
Encoding::IBM775 => 0x00...0x100,
|
||||
Encoding::IBM852 => 0x00...0x100,
|
||||
Encoding::IBM855 => 0x00...0x100,
|
||||
Encoding::IBM857 => 0x00...0x100,
|
||||
Encoding::IBM860 => 0x00...0x100,
|
||||
Encoding::IBM861 => 0x00...0x100,
|
||||
Encoding::IBM862 => 0x00...0x100,
|
||||
Encoding::IBM863 => 0x00...0x100,
|
||||
Encoding::IBM864 => 0x00...0x100,
|
||||
Encoding::IBM865 => 0x00...0x100,
|
||||
Encoding::IBM866 => 0x00...0x100,
|
||||
Encoding::IBM869 => 0x00...0x100,
|
||||
Encoding::ISO_8859_1 => 0x00...0x100,
|
||||
Encoding::ISO_8859_2 => 0x00...0x100,
|
||||
Encoding::ISO_8859_3 => 0x00...0x100,
|
||||
Encoding::ISO_8859_4 => 0x00...0x100,
|
||||
Encoding::ISO_8859_5 => 0x00...0x100,
|
||||
Encoding::ISO_8859_6 => 0x00...0x100,
|
||||
Encoding::ISO_8859_7 => 0x00...0x100,
|
||||
Encoding::ISO_8859_8 => 0x00...0x100,
|
||||
Encoding::ISO_8859_9 => 0x00...0x100,
|
||||
Encoding::ISO_8859_10 => 0x00...0x100,
|
||||
Encoding::ISO_8859_11 => 0x00...0x100,
|
||||
Encoding::ISO_8859_13 => 0x00...0x100,
|
||||
Encoding::ISO_8859_14 => 0x00...0x100,
|
||||
Encoding::ISO_8859_15 => 0x00...0x100,
|
||||
Encoding::ISO_8859_16 => 0x00...0x100,
|
||||
Encoding::KOI8_R => 0x00...0x100,
|
||||
Encoding::KOI8_U => 0x00...0x100,
|
||||
Encoding::MACCENTEURO => 0x00...0x100,
|
||||
Encoding::MACCROATIAN => 0x00...0x100,
|
||||
Encoding::MACCYRILLIC => 0x00...0x100,
|
||||
Encoding::MACGREEK => 0x00...0x100,
|
||||
Encoding::MACICELAND => 0x00...0x100,
|
||||
Encoding::MACROMAN => 0x00...0x100,
|
||||
Encoding::MACROMANIA => 0x00...0x100,
|
||||
Encoding::MACTHAI => 0x00...0x100,
|
||||
Encoding::MACTURKISH => 0x00...0x100,
|
||||
Encoding::MACUKRAINE => 0x00...0x100,
|
||||
Encoding::TIS_620 => 0x00...0x100,
|
||||
Encoding::Windows_1250 => 0x00...0x100,
|
||||
Encoding::Windows_1251 => 0x00...0x100,
|
||||
Encoding::Windows_1252 => 0x00...0x100,
|
||||
Encoding::Windows_1253 => 0x00...0x100,
|
||||
Encoding::Windows_1254 => 0x00...0x100,
|
||||
Encoding::Windows_1255 => 0x00...0x100,
|
||||
Encoding::Windows_1256 => 0x00...0x100,
|
||||
Encoding::Windows_1257 => 0x00...0x100,
|
||||
Encoding::Windows_1258 => 0x00...0x100,
|
||||
Encoding::Windows_874 => 0x00...0x100,
|
||||
Encoding::Big5 => 0x00...0x10000,
|
||||
Encoding::Big5_HKSCS => 0x00...0x10000,
|
||||
Encoding::Big5_UAO => 0x00...0x10000,
|
||||
Encoding::CP949 => 0x00...0x10000,
|
||||
Encoding::CP950 => 0x00...0x10000,
|
||||
Encoding::CP51932 => 0x00...0x10000,
|
||||
Encoding::GBK => 0x00...0x10000,
|
||||
Encoding::MACJAPANESE => 0x00...0x10000,
|
||||
Encoding::Shift_JIS => 0x00...0x10000,
|
||||
Encoding::Windows_31J => 0x00...0x10000
|
||||
Encoding::ASCII => 0x00...0x100,
|
||||
Encoding::ASCII_8BIT => 0x00...0x100,
|
||||
Encoding::CP850 => 0x00...0x100,
|
||||
Encoding::CP852 => 0x00...0x100,
|
||||
Encoding::CP855 => 0x00...0x100,
|
||||
Encoding::GB1988 => 0x00...0x100,
|
||||
Encoding::IBM437 => 0x00...0x100,
|
||||
Encoding::IBM720 => 0x00...0x100,
|
||||
Encoding::IBM737 => 0x00...0x100,
|
||||
Encoding::IBM775 => 0x00...0x100,
|
||||
Encoding::IBM852 => 0x00...0x100,
|
||||
Encoding::IBM855 => 0x00...0x100,
|
||||
Encoding::IBM857 => 0x00...0x100,
|
||||
Encoding::IBM860 => 0x00...0x100,
|
||||
Encoding::IBM861 => 0x00...0x100,
|
||||
Encoding::IBM862 => 0x00...0x100,
|
||||
Encoding::IBM863 => 0x00...0x100,
|
||||
Encoding::IBM864 => 0x00...0x100,
|
||||
Encoding::IBM865 => 0x00...0x100,
|
||||
Encoding::IBM866 => 0x00...0x100,
|
||||
Encoding::IBM869 => 0x00...0x100,
|
||||
Encoding::ISO_8859_1 => 0x00...0x100,
|
||||
Encoding::ISO_8859_2 => 0x00...0x100,
|
||||
Encoding::ISO_8859_3 => 0x00...0x100,
|
||||
Encoding::ISO_8859_4 => 0x00...0x100,
|
||||
Encoding::ISO_8859_5 => 0x00...0x100,
|
||||
Encoding::ISO_8859_6 => 0x00...0x100,
|
||||
Encoding::ISO_8859_7 => 0x00...0x100,
|
||||
Encoding::ISO_8859_8 => 0x00...0x100,
|
||||
Encoding::ISO_8859_9 => 0x00...0x100,
|
||||
Encoding::ISO_8859_10 => 0x00...0x100,
|
||||
Encoding::ISO_8859_11 => 0x00...0x100,
|
||||
Encoding::ISO_8859_13 => 0x00...0x100,
|
||||
Encoding::ISO_8859_14 => 0x00...0x100,
|
||||
Encoding::ISO_8859_15 => 0x00...0x100,
|
||||
Encoding::ISO_8859_16 => 0x00...0x100,
|
||||
Encoding::KOI8_R => 0x00...0x100,
|
||||
Encoding::KOI8_U => 0x00...0x100,
|
||||
Encoding::MACCENTEURO => 0x00...0x100,
|
||||
Encoding::MACCROATIAN => 0x00...0x100,
|
||||
Encoding::MACCYRILLIC => 0x00...0x100,
|
||||
Encoding::MACGREEK => 0x00...0x100,
|
||||
Encoding::MACICELAND => 0x00...0x100,
|
||||
Encoding::MACROMAN => 0x00...0x100,
|
||||
Encoding::MACROMANIA => 0x00...0x100,
|
||||
Encoding::MACTHAI => 0x00...0x100,
|
||||
Encoding::MACTURKISH => 0x00...0x100,
|
||||
Encoding::MACUKRAINE => 0x00...0x100,
|
||||
Encoding::TIS_620 => 0x00...0x100,
|
||||
Encoding::Windows_1250 => 0x00...0x100,
|
||||
Encoding::Windows_1251 => 0x00...0x100,
|
||||
Encoding::Windows_1252 => 0x00...0x100,
|
||||
Encoding::Windows_1253 => 0x00...0x100,
|
||||
Encoding::Windows_1254 => 0x00...0x100,
|
||||
Encoding::Windows_1255 => 0x00...0x100,
|
||||
Encoding::Windows_1256 => 0x00...0x100,
|
||||
Encoding::Windows_1257 => 0x00...0x100,
|
||||
Encoding::Windows_1258 => 0x00...0x100,
|
||||
Encoding::Windows_874 => 0x00...0x100,
|
||||
Encoding::Big5 => 0x00...0x10000,
|
||||
Encoding::Big5_HKSCS => 0x00...0x10000,
|
||||
Encoding::Big5_UAO => 0x00...0x10000,
|
||||
Encoding::CP949 => 0x00...0x10000,
|
||||
Encoding::CP950 => 0x00...0x10000,
|
||||
Encoding::CP51932 => 0x00...0x10000,
|
||||
Encoding::GBK => 0x00...0x10000,
|
||||
Encoding::MACJAPANESE => 0x00...0x10000,
|
||||
Encoding::Shift_JIS => 0x00...0x10000,
|
||||
Encoding::SJIS_DoCoMo => 0x00...0x10000,
|
||||
Encoding::SJIS_KDDI => 0x00...0x10000,
|
||||
Encoding::SJIS_SoftBank => 0x00...0x10000,
|
||||
Encoding::Windows_31J => 0x00...0x10000
|
||||
}
|
||||
|
||||
# By default we don't test every codepoint in these encodings because they
|
||||
|
|
Загрузка…
Ссылка в новой задаче