зеркало из https://github.com/github/ruby.git
[ruby/prism] Provide options for reducing size
https://github.com/ruby/prism/commit/592128de4d
This commit is contained in:
Родитель
0e8b6c62a4
Коммит
af7bf9e0d8
318
prism/encoding.c
318
prism/encoding.c
|
@ -2358,6 +2358,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
|
||||
static pm_unicode_codepoint_t
|
||||
pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
||||
if (b[0] < 0x80) {
|
||||
|
@ -2452,6 +2454,8 @@ pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#undef UNICODE_ALPHA_CODEPOINTS_LENGTH
|
||||
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
|
||||
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
|
||||
|
@ -2480,6 +2484,8 @@ static const uint8_t pm_encoding_ascii_table[256] = {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
||||
};
|
||||
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
|
||||
/**
|
||||
* Each element of the following table contains a bitfield that indicates a
|
||||
* piece of information about the corresponding CP850 character.
|
||||
|
@ -3918,6 +3924,7 @@ PRISM_ENCODING_TABLE(windows_1258)
|
|||
PRISM_ENCODING_TABLE(windows_874)
|
||||
|
||||
#undef PRISM_ENCODING_TABLE
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the ASCII encoding. This basically
|
||||
|
@ -3975,6 +3982,122 @@ pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_
|
|||
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
|
||||
}
|
||||
|
||||
/**
|
||||
* For a lot of encodings the default is that they are a single byte long no
|
||||
* matter what the codepoint, so this function is shared between them.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the EUC-JP encoding, or 0 if a
|
||||
* character cannot be decoded from the given bytes.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||
// These are the single byte characters.
|
||||
if (*b < 0x80) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// These are the double byte characters.
|
||||
if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// These are the triple byte characters.
|
||||
if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the EUC-JP encoding if it is an
|
||||
* uppercase character.
|
||||
*/
|
||||
static bool
|
||||
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_euc_jp_char_width(b, n);
|
||||
|
||||
if (width == 1) {
|
||||
return pm_encoding_ascii_isupper_char(b, n);
|
||||
} else if (width == 2) {
|
||||
return (
|
||||
(b[0] == 0xA3 && b[1] >= 0xC1 && b[1] <= 0xDA) ||
|
||||
(b[0] == 0xA6 && b[1] >= 0xA1 && b[1] <= 0xB8) ||
|
||||
(b[0] == 0xA7 && b[1] >= 0xA1 && b[1] <= 0xC1)
|
||||
);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding, or 0 if a
|
||||
* character cannot be decoded from the given bytes.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||
// These are the single byte characters.
|
||||
if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// These are the double byte characters.
|
||||
if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC && b[1] != 0x7F)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
||||
* alphanumeric character.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
||||
return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alnum_char(b, n)) : width;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
||||
* alphabetical character.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
||||
return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alpha_char(b, n)) : width;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
||||
* uppercase character.
|
||||
*/
|
||||
static bool
|
||||
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
||||
|
||||
if (width == 1) {
|
||||
return pm_encoding_ascii_isupper_char(b, n);
|
||||
} else if (width == 2) {
|
||||
return (
|
||||
((b[0] == 0x82) && (b[1] >= 0x60 && b[1] <= 0x79)) ||
|
||||
((b[0] == 0x83) && (b[1] >= 0x9F && b[1] <= 0xB6)) ||
|
||||
((b[0] == 0x84) && (b[1] >= 0x40 && b[1] <= 0x60))
|
||||
);
|
||||
} else {
|
||||
return width;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
|
||||
/**
|
||||
* Certain encodings are equivalent to ASCII below 0x80, so it works for our
|
||||
* purposes to have a function here that first checks the bounds and then falls
|
||||
|
@ -3985,15 +4108,6 @@ pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
|
|||
return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
|
||||
}
|
||||
|
||||
/**
|
||||
* For a lot of encodings the default is that they are a single byte long no
|
||||
* matter what the codepoint, so this function is shared between them.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Big5 encoding, or 0 if a
|
||||
* character cannot be decoded from the given bytes.
|
||||
|
@ -4075,51 +4189,6 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the EUC-JP encoding, or 0 if a
|
||||
* character cannot be decoded from the given bytes.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||
// These are the single byte characters.
|
||||
if (*b < 0x80) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// These are the double byte characters.
|
||||
if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// These are the triple byte characters.
|
||||
if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the EUC-JP encoding if it is an
|
||||
* uppercase character.
|
||||
*/
|
||||
static bool
|
||||
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_euc_jp_char_width(b, n);
|
||||
|
||||
if (width == 1) {
|
||||
return pm_encoding_ascii_isupper_char(b, n);
|
||||
} else if (width == 2) {
|
||||
return (
|
||||
(b[0] == 0xA3 && b[1] >= 0xC1 && b[1] <= 0xDA) ||
|
||||
(b[0] == 0xA6 && b[1] >= 0xA1 && b[1] <= 0xB8) ||
|
||||
(b[0] == 0xA7 && b[1] >= 0xA1 && b[1] <= 0xC1)
|
||||
);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the EUC-KR encoding, or 0 if a
|
||||
* character cannot be decoded from the given bytes.
|
||||
|
@ -4218,65 +4287,7 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding, or 0 if a
|
||||
* character cannot be decoded from the given bytes.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||
// These are the single byte characters.
|
||||
if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// These are the double byte characters.
|
||||
if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC && b[1] != 0x7F)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
||||
* alphanumeric character.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
||||
return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alnum_char(b, n)) : width;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
||||
* alphabetical character.
|
||||
*/
|
||||
static size_t
|
||||
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
||||
return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alpha_char(b, n)) : width;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the next character in the Shift_JIS encoding if it is an
|
||||
* uppercase character.
|
||||
*/
|
||||
static bool
|
||||
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||
size_t width = pm_encoding_shift_jis_char_width(b, n);
|
||||
|
||||
if (width == 1) {
|
||||
return pm_encoding_ascii_isupper_char(b, n);
|
||||
} else if (width == 2) {
|
||||
return (
|
||||
((b[0] == 0x82) && (b[1] >= 0x60 && b[1] <= 0x79)) ||
|
||||
((b[0] == 0x83) && (b[1] >= 0x9F && b[1] <= 0xB6)) ||
|
||||
((b[0] == 0x84) && (b[1] >= 0x40 && b[1] <= 0x60))
|
||||
);
|
||||
} else {
|
||||
return width;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This is the table of all of the encodings that prism supports.
|
||||
|
@ -4290,6 +4301,14 @@ const pm_encoding_t pm_encodings[] = {
|
|||
.isupper_char = pm_encoding_utf_8_isupper_char,
|
||||
.multibyte = true
|
||||
},
|
||||
[PM_ENCODING_US_ASCII] = {
|
||||
.name = "US-ASCII",
|
||||
.char_width = pm_encoding_ascii_char_width,
|
||||
.alnum_char = pm_encoding_ascii_alnum_char,
|
||||
.alpha_char = pm_encoding_ascii_alpha_char,
|
||||
.isupper_char = pm_encoding_ascii_isupper_char,
|
||||
.multibyte = false
|
||||
},
|
||||
[PM_ENCODING_ASCII_8BIT] = {
|
||||
.name = "ASCII-8BIT",
|
||||
.char_width = pm_encoding_single_char_width,
|
||||
|
@ -4298,6 +4317,24 @@ const pm_encoding_t pm_encodings[] = {
|
|||
.isupper_char = pm_encoding_ascii_isupper_char,
|
||||
.multibyte = false
|
||||
},
|
||||
[PM_ENCODING_EUC_JP] = {
|
||||
.name = "EUC-JP",
|
||||
.char_width = pm_encoding_euc_jp_char_width,
|
||||
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
||||
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
||||
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
||||
.multibyte = true
|
||||
},
|
||||
[PM_ENCODING_WINDOWS_31J] = {
|
||||
.name = "Windows-31J",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
},
|
||||
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
[PM_ENCODING_BIG5] = {
|
||||
.name = "Big5",
|
||||
.char_width = pm_encoding_big5_char_width,
|
||||
|
@ -4394,14 +4431,6 @@ const pm_encoding_t pm_encodings[] = {
|
|||
.isupper_char = pm_encoding_ascii_isupper_char_7bit,
|
||||
.multibyte = true
|
||||
},
|
||||
[PM_ENCODING_EUC_JP] = {
|
||||
.name = "EUC-JP",
|
||||
.char_width = pm_encoding_euc_jp_char_width,
|
||||
.alnum_char = pm_encoding_ascii_alnum_char_7bit,
|
||||
.alpha_char = pm_encoding_ascii_alpha_char_7bit,
|
||||
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
||||
.multibyte = true
|
||||
},
|
||||
[PM_ENCODING_EUC_JP_MS] = {
|
||||
.name = "eucJP-ms",
|
||||
.char_width = pm_encoding_euc_jp_char_width,
|
||||
|
@ -4874,14 +4903,6 @@ const pm_encoding_t pm_encodings[] = {
|
|||
.isupper_char = pm_encoding_tis_620_isupper_char,
|
||||
.multibyte = false
|
||||
},
|
||||
[PM_ENCODING_US_ASCII] = {
|
||||
.name = "US-ASCII",
|
||||
.char_width = pm_encoding_ascii_char_width,
|
||||
.alnum_char = pm_encoding_ascii_alnum_char,
|
||||
.alpha_char = pm_encoding_ascii_alpha_char,
|
||||
.isupper_char = pm_encoding_ascii_isupper_char,
|
||||
.multibyte = false
|
||||
},
|
||||
[PM_ENCODING_UTF8_MAC] = {
|
||||
.name = "UTF8-MAC",
|
||||
.char_width = pm_encoding_utf_8_char_width,
|
||||
|
@ -4986,14 +5007,6 @@ const pm_encoding_t pm_encodings[] = {
|
|||
.isupper_char = pm_encoding_windows_1258_isupper_char,
|
||||
.multibyte = false
|
||||
},
|
||||
[PM_ENCODING_WINDOWS_31J] = {
|
||||
.name = "Windows-31J",
|
||||
.char_width = pm_encoding_shift_jis_char_width,
|
||||
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
},
|
||||
[PM_ENCODING_WINDOWS_874] = {
|
||||
.name = "Windows-874",
|
||||
.char_width = pm_encoding_single_char_width,
|
||||
|
@ -5002,6 +5015,7 @@ const pm_encoding_t pm_encodings[] = {
|
|||
.isupper_char = pm_encoding_windows_874_isupper_char,
|
||||
.multibyte = false
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -5016,11 +5030,13 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
// UTF-8 can contain extra information at the end about the platform it is
|
||||
// encoded on, such as UTF-8-MAC or UTF-8-UNIX. We'll ignore those suffixes.
|
||||
if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "UTF-8", 5) == 0)) {
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
// We need to explicitly handle UTF-8-HFS, as that one needs to switch
|
||||
// over to being UTF8-MAC.
|
||||
if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-HFS", 4) == 0)) {
|
||||
return &pm_encodings[PM_ENCODING_UTF8_MAC];
|
||||
}
|
||||
#endif
|
||||
|
||||
// Otherwise we'll return the default UTF-8 encoding.
|
||||
return PM_ENCODING_UTF_8_ENTRY;
|
||||
|
@ -5040,11 +5056,16 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
break;
|
||||
case 'B': case 'b':
|
||||
ENCODING1("BINARY", PM_ENCODING_ASCII_8BIT);
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("Big5", PM_ENCODING_BIG5);
|
||||
ENCODING2("Big5-HKSCS", "Big5-HKSCS:2008", PM_ENCODING_BIG5_HKSCS);
|
||||
ENCODING1("Big5-UAO", PM_ENCODING_BIG5_UAO);
|
||||
#endif
|
||||
break;
|
||||
case 'C': case 'c':
|
||||
ENCODING1("CP65001", PM_ENCODING_UTF_8);
|
||||
ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("CESU-8", PM_ENCODING_CESU_8);
|
||||
ENCODING1("CP437", PM_ENCODING_IBM437);
|
||||
ENCODING1("CP720", PM_ENCODING_IBM720);
|
||||
|
@ -5064,7 +5085,6 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
ENCODING1("CP874", PM_ENCODING_WINDOWS_874);
|
||||
ENCODING1("CP878", PM_ENCODING_KOI8_R);
|
||||
ENCODING1("CP863", PM_ENCODING_IBM863);
|
||||
ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
|
||||
ENCODING1("CP936", PM_ENCODING_GBK);
|
||||
ENCODING1("CP949", PM_ENCODING_CP949);
|
||||
ENCODING1("CP950", PM_ENCODING_CP950);
|
||||
|
@ -5079,25 +5099,30 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
ENCODING1("CP1257", PM_ENCODING_WINDOWS_1257);
|
||||
ENCODING1("CP1258", PM_ENCODING_WINDOWS_1258);
|
||||
ENCODING1("CP51932", PM_ENCODING_CP51932);
|
||||
ENCODING1("CP65001", PM_ENCODING_UTF_8);
|
||||
#endif
|
||||
break;
|
||||
case 'E': case 'e':
|
||||
ENCODING2("EUC-JP", "eucJP", PM_ENCODING_EUC_JP);
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING2("eucJP-ms", "euc-jp-ms", PM_ENCODING_EUC_JP_MS);
|
||||
ENCODING2("EUC-JIS-2004", "EUC-JISX0213", PM_ENCODING_EUC_JIS_2004);
|
||||
ENCODING2("EUC-KR", "eucKR", PM_ENCODING_EUC_KR);
|
||||
ENCODING2("EUC-CN", "eucCN", PM_ENCODING_GB2312);
|
||||
ENCODING2("EUC-TW", "eucTW", PM_ENCODING_EUC_TW);
|
||||
ENCODING1("Emacs-Mule", PM_ENCODING_EMACS_MULE);
|
||||
#endif
|
||||
break;
|
||||
case 'G': case 'g':
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("GBK", PM_ENCODING_GBK);
|
||||
ENCODING1("GB12345", PM_ENCODING_GB12345);
|
||||
ENCODING1("GB18030", PM_ENCODING_GB18030);
|
||||
ENCODING1("GB1988", PM_ENCODING_GB1988);
|
||||
ENCODING1("GB2312", PM_ENCODING_GB2312);
|
||||
#endif
|
||||
break;
|
||||
case 'I': case 'i':
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("IBM437", PM_ENCODING_IBM437);
|
||||
ENCODING1("IBM720", PM_ENCODING_IBM720);
|
||||
ENCODING1("IBM737", PM_ENCODING_IBM737);
|
||||
|
@ -5129,12 +5154,16 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
ENCODING2("ISO-8859-14", "ISO8859-14", PM_ENCODING_ISO_8859_14);
|
||||
ENCODING2("ISO-8859-15", "ISO8859-15", PM_ENCODING_ISO_8859_15);
|
||||
ENCODING2("ISO-8859-16", "ISO8859-16", PM_ENCODING_ISO_8859_16);
|
||||
#endif
|
||||
break;
|
||||
case 'K': case 'k':
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("KOI8-R", PM_ENCODING_KOI8_R);
|
||||
ENCODING1("KOI8-U", PM_ENCODING_KOI8_U);
|
||||
#endif
|
||||
break;
|
||||
case 'M': case 'm':
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("macCentEuro", PM_ENCODING_MAC_CENT_EURO);
|
||||
ENCODING1("macCroatian", PM_ENCODING_MAC_CROATIAN);
|
||||
ENCODING1("macCyrillic", PM_ENCODING_MAC_CYRILLIC);
|
||||
|
@ -5147,31 +5176,39 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
ENCODING1("macThai", PM_ENCODING_MAC_THAI);
|
||||
ENCODING1("macTurkish", PM_ENCODING_MAC_TURKISH);
|
||||
ENCODING1("macUkraine", PM_ENCODING_MAC_UKRAINE);
|
||||
#endif
|
||||
break;
|
||||
case 'P': case 'p':
|
||||
ENCODING1("PCK", PM_ENCODING_WINDOWS_31J);
|
||||
break;
|
||||
case 'S': case 's':
|
||||
ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
|
||||
ENCODING1("SJIS", PM_ENCODING_WINDOWS_31J);
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
|
||||
ENCODING1("SJIS-DoCoMo", PM_ENCODING_SJIS_DOCOMO);
|
||||
ENCODING1("SJIS-KDDI", PM_ENCODING_SJIS_KDDI);
|
||||
ENCODING1("SJIS-SoftBank", PM_ENCODING_SJIS_SOFTBANK);
|
||||
ENCODING1("stateless-ISO-2022-JP", PM_ENCODING_STATELESS_ISO_2022_JP);
|
||||
ENCODING1("stateless-ISO-2022-JP-KDDI", PM_ENCODING_STATELESS_ISO_2022_JP_KDDI);
|
||||
#endif
|
||||
break;
|
||||
case 'T': case 't':
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("TIS-620", PM_ENCODING_TIS_620);
|
||||
#endif
|
||||
break;
|
||||
case 'U': case 'u':
|
||||
ENCODING1("US-ASCII", PM_ENCODING_US_ASCII);
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING2("UTF8-MAC", "UTF-8-HFS", PM_ENCODING_UTF8_MAC);
|
||||
ENCODING1("UTF8-DoCoMo", PM_ENCODING_UTF8_DOCOMO);
|
||||
ENCODING1("UTF8-KDDI", PM_ENCODING_UTF8_KDDI);
|
||||
ENCODING1("UTF8-SoftBank", PM_ENCODING_UTF8_SOFTBANK);
|
||||
#endif
|
||||
break;
|
||||
case 'W': case 'w':
|
||||
ENCODING1("Windows-31J", PM_ENCODING_WINDOWS_31J);
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
ENCODING1("Windows-874", PM_ENCODING_WINDOWS_874);
|
||||
ENCODING1("Windows-1250", PM_ENCODING_WINDOWS_1250);
|
||||
ENCODING1("Windows-1251", PM_ENCODING_WINDOWS_1251);
|
||||
|
@ -5182,6 +5219,7 @@ pm_encoding_find(const uint8_t *start, const uint8_t *end) {
|
|||
ENCODING1("Windows-1256", PM_ENCODING_WINDOWS_1256);
|
||||
ENCODING1("Windows-1257", PM_ENCODING_WINDOWS_1257);
|
||||
ENCODING1("Windows-1258", PM_ENCODING_WINDOWS_1258);
|
||||
#endif
|
||||
break;
|
||||
case '6':
|
||||
ENCODING1("646", PM_ENCODING_US_ASCII);
|
||||
|
|
|
@ -135,7 +135,14 @@ extern const uint8_t pm_encoding_unicode_table[256];
|
|||
*/
|
||||
typedef enum {
|
||||
PM_ENCODING_UTF_8 = 0,
|
||||
PM_ENCODING_US_ASCII,
|
||||
PM_ENCODING_ASCII_8BIT,
|
||||
PM_ENCODING_EUC_JP,
|
||||
PM_ENCODING_WINDOWS_31J,
|
||||
|
||||
// We optionally support excluding the full set of encodings to only support the
|
||||
// minimum necessary to process Ruby code without encoding comments.
|
||||
#ifndef PRISM_ENCODING_EXCLUDE_FULL
|
||||
PM_ENCODING_BIG5,
|
||||
PM_ENCODING_BIG5_HKSCS,
|
||||
PM_ENCODING_BIG5_UAO,
|
||||
|
@ -148,7 +155,6 @@ typedef enum {
|
|||
PM_ENCODING_CP950,
|
||||
PM_ENCODING_CP951,
|
||||
PM_ENCODING_EMACS_MULE,
|
||||
PM_ENCODING_EUC_JP,
|
||||
PM_ENCODING_EUC_JP_MS,
|
||||
PM_ENCODING_EUC_JIS_2004,
|
||||
PM_ENCODING_EUC_KR,
|
||||
|
@ -208,7 +214,6 @@ typedef enum {
|
|||
PM_ENCODING_STATELESS_ISO_2022_JP,
|
||||
PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
|
||||
PM_ENCODING_TIS_620,
|
||||
PM_ENCODING_US_ASCII,
|
||||
PM_ENCODING_UTF8_MAC,
|
||||
PM_ENCODING_UTF8_DOCOMO,
|
||||
PM_ENCODING_UTF8_KDDI,
|
||||
|
@ -222,8 +227,9 @@ typedef enum {
|
|||
PM_ENCODING_WINDOWS_1256,
|
||||
PM_ENCODING_WINDOWS_1257,
|
||||
PM_ENCODING_WINDOWS_1258,
|
||||
PM_ENCODING_WINDOWS_31J,
|
||||
PM_ENCODING_WINDOWS_874,
|
||||
#endif
|
||||
|
||||
PM_ENCODING_MAXIMUM
|
||||
} pm_encoding_type_t;
|
||||
|
||||
|
|
|
@ -311,7 +311,7 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|||
pm_options_t options = { 0 };
|
||||
string_options(argc, argv, &input, &options);
|
||||
|
||||
#ifdef PRISM_DEBUG_MODE_BUILD
|
||||
#ifdef PRISM_BUILD_DEBUG
|
||||
size_t length = pm_string_length(&input);
|
||||
char* dup = xmalloc(length);
|
||||
memcpy(dup, pm_string_source(&input), length);
|
||||
|
@ -320,7 +320,7 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|||
|
||||
VALUE value = dump_input(&input, &options);
|
||||
|
||||
#ifdef PRISM_DEBUG_MODE_BUILD
|
||||
#ifdef PRISM_BUILD_DEBUG
|
||||
xfree(dup);
|
||||
#endif
|
||||
|
||||
|
@ -737,7 +737,7 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|||
pm_options_t options = { 0 };
|
||||
string_options(argc, argv, &input, &options);
|
||||
|
||||
#ifdef PRISM_DEBUG_MODE_BUILD
|
||||
#ifdef PRISM_BUILD_DEBUG
|
||||
size_t length = pm_string_length(&input);
|
||||
char* dup = xmalloc(length);
|
||||
memcpy(dup, pm_string_source(&input), length);
|
||||
|
@ -746,7 +746,7 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|||
|
||||
VALUE value = parse_input(&input, &options);
|
||||
|
||||
#ifdef PRISM_DEBUG_MODE_BUILD
|
||||
#ifdef PRISM_BUILD_DEBUG
|
||||
xfree(dup);
|
||||
#endif
|
||||
|
||||
|
|
50
prism/pack.c
50
prism/pack.c
|
@ -1,16 +1,43 @@
|
|||
#include "prism/pack.h"
|
||||
|
||||
// We optionally support parsing String#pack templates. For systems that don't
|
||||
// want or need this functionality, it can be turned off with the
|
||||
// PRISM_EXCLUDE_PACK define.
|
||||
#ifdef PRISM_EXCLUDE_PACK
|
||||
|
||||
void pm_pack_parse(void) {}
|
||||
|
||||
#else
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
|
||||
static uintmax_t
|
||||
strtoumaxc(const char **format);
|
||||
strtoumaxc(const char **format) {
|
||||
uintmax_t value = 0;
|
||||
while (**format >= '0' && **format <= '9') {
|
||||
if (value > UINTMAX_MAX / 10) {
|
||||
errno = ERANGE;
|
||||
}
|
||||
value = value * 10 + ((uintmax_t) (**format - '0'));
|
||||
(*format)++;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
PRISM_EXPORTED_FUNCTION pm_pack_result
|
||||
pm_pack_parse(pm_pack_variant variant, const char **format, const char *format_end,
|
||||
pm_pack_type *type, pm_pack_signed *signed_type, pm_pack_endian *endian, pm_pack_size *size,
|
||||
pm_pack_length_type *length_type, uint64_t *length, pm_pack_encoding *encoding) {
|
||||
|
||||
pm_pack_parse(
|
||||
pm_pack_variant variant,
|
||||
const char **format,
|
||||
const char *format_end,
|
||||
pm_pack_type *type,
|
||||
pm_pack_signed *signed_type,
|
||||
pm_pack_endian *endian,
|
||||
pm_pack_size *size,
|
||||
pm_pack_length_type *length_type,
|
||||
uint64_t *length,
|
||||
pm_pack_encoding *encoding
|
||||
) {
|
||||
if (*encoding == PM_PACK_ENCODING_START) {
|
||||
*encoding = PM_PACK_ENCODING_US_ASCII;
|
||||
}
|
||||
|
@ -479,15 +506,4 @@ pm_size_to_native(pm_pack_size size) {
|
|||
}
|
||||
}
|
||||
|
||||
static uintmax_t
|
||||
strtoumaxc(const char **format) {
|
||||
uintmax_t value = 0;
|
||||
while (**format >= '0' && **format <= '9') {
|
||||
if (value > UINTMAX_MAX / 10) {
|
||||
errno = ERANGE;
|
||||
}
|
||||
value = value * 10 + ((uintmax_t) (**format - '0'));
|
||||
(*format)++;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
#endif
|
||||
|
|
11
prism/pack.h
11
prism/pack.h
|
@ -6,6 +6,15 @@
|
|||
#ifndef PRISM_PACK_H
|
||||
#define PRISM_PACK_H
|
||||
|
||||
// We optionally support parsing String#pack templates. For systems that don't
|
||||
// want or need this functionality, it can be turned off with the
|
||||
// PRISM_EXCLUDE_PACK define.
|
||||
#ifdef PRISM_EXCLUDE_PACK
|
||||
|
||||
void pm_pack_parse(void);
|
||||
|
||||
#else
|
||||
|
||||
#include "prism/defines.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
@ -150,3 +159,5 @@ pm_pack_parse(
|
|||
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,6 +6,12 @@
|
|||
#ifndef PRISM_PRETTYPRINT_H
|
||||
#define PRISM_PRETTYPRINT_H
|
||||
|
||||
#ifdef PRISM_EXCLUDE_PRETTYPRINT
|
||||
|
||||
void pm_prettyprint(void);
|
||||
|
||||
#else
|
||||
|
||||
#include "prism/defines.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -24,3 +30,5 @@
|
|||
PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -19316,6 +19316,41 @@ pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse
|
|||
return node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the source and return true if it parses without errors or warnings.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION bool
|
||||
pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
|
||||
pm_options_t options = { 0 };
|
||||
pm_options_read(&options, data);
|
||||
|
||||
pm_parser_t parser;
|
||||
pm_parser_init(&parser, source, size, &options);
|
||||
|
||||
pm_node_t *node = pm_parse(&parser);
|
||||
pm_node_destroy(&parser, node);
|
||||
|
||||
bool result = parser.error_list.size == 0 && parser.warning_list.size == 0;
|
||||
pm_parser_free(&parser);
|
||||
pm_options_free(&options);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#undef PM_CASE_KEYWORD
|
||||
#undef PM_CASE_OPERATOR
|
||||
#undef PM_CASE_WRITABLE
|
||||
#undef PM_STRING_EMPTY
|
||||
#undef PM_LOCATION_NODE_BASE_VALUE
|
||||
#undef PM_LOCATION_NODE_VALUE
|
||||
#undef PM_LOCATION_NULL_VALUE
|
||||
#undef PM_LOCATION_TOKEN_VALUE
|
||||
|
||||
// We optionally support serializing to a binary string. For systems that don't
|
||||
// want or need this functionality, it can be turned off with the
|
||||
// PRISM_EXCLUDE_SERIALIZATION define.
|
||||
#ifndef PRISM_EXCLUDE_SERIALIZATION
|
||||
|
||||
static inline void
|
||||
pm_serialize_header(pm_buffer_t *buffer) {
|
||||
pm_buffer_append_string(buffer, "PRISM", 5);
|
||||
|
@ -19402,14 +19437,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|||
pm_options_free(&options);
|
||||
}
|
||||
|
||||
#undef PM_CASE_KEYWORD
|
||||
#undef PM_CASE_OPERATOR
|
||||
#undef PM_CASE_WRITABLE
|
||||
#undef PM_STRING_EMPTY
|
||||
#undef PM_LOCATION_NODE_BASE_VALUE
|
||||
#undef PM_LOCATION_NODE_VALUE
|
||||
#undef PM_LOCATION_NULL_VALUE
|
||||
#undef PM_LOCATION_TOKEN_VALUE
|
||||
#endif
|
||||
|
||||
/** An error that is going to be formatted into the output. */
|
||||
typedef struct {
|
||||
|
|
|
@ -98,6 +98,11 @@ typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
|
|||
*/
|
||||
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options);
|
||||
|
||||
// We optionally support serializing to a binary string. For systems that don't
|
||||
// want or need this functionality, it can be turned off with the
|
||||
// PRISM_EXCLUDE_SERIALIZATION define.
|
||||
#ifndef PRISM_EXCLUDE_SERIALIZATION
|
||||
|
||||
/**
|
||||
* Parse and serialize the AST represented by the source that is read out of the
|
||||
* given stream into to the given buffer.
|
||||
|
@ -185,6 +190,8 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t
|
|||
*/
|
||||
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Parse the source and return true if it parses without errors or warnings.
|
||||
*
|
||||
|
@ -220,6 +227,10 @@ const char * pm_token_type_human(pm_token_type_t token_type);
|
|||
*/
|
||||
PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize);
|
||||
|
||||
// We optionally support dumping to JSON. For systems that don't want or need
|
||||
// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
|
||||
#ifndef PRISM_EXCLUDE_JSON
|
||||
|
||||
/**
|
||||
* Dump JSON to the given buffer.
|
||||
*
|
||||
|
@ -229,6 +240,8 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser,
|
|||
*/
|
||||
PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @mainpage
|
||||
*
|
||||
|
|
|
@ -247,6 +247,10 @@ pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *nod
|
|||
}
|
||||
}
|
||||
|
||||
// We optionally support dumping to JSON. For systems that don't want or need
|
||||
// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
|
||||
#ifndef PRISM_EXCLUDE_JSON
|
||||
|
||||
static void
|
||||
pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
|
||||
const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
|
||||
|
@ -360,3 +364,5 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,15 @@
|
|||
<%# encoding: ASCII -%>
|
||||
#include "prism/prettyprint.h"
|
||||
|
||||
// We optionally support pretty printing nodes. For systems that don't want or
|
||||
// need this functionality, it can be turned off with the
|
||||
// PRISM_EXCLUDE_PRETTYPRINT define.
|
||||
#ifdef PRISM_EXCLUDE_PRETTYPRINT
|
||||
|
||||
void pm_prettyprint(void) {}
|
||||
|
||||
#else
|
||||
|
||||
static inline void
|
||||
prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
|
||||
pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
|
||||
|
@ -154,3 +163,5 @@ pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_n
|
|||
prettyprint_node(output_buffer, parser, node, &prefix_buffer);
|
||||
pm_buffer_free(&prefix_buffer);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
#include "prism.h"
|
||||
|
||||
// We optionally support serializing to a binary string. For systems that don't
|
||||
// want or need this functionality, it can be turned off with the
|
||||
// PRISM_EXCLUDE_SERIALIZATION define.
|
||||
#ifndef PRISM_EXCLUDE_SERIALIZATION
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static inline uint32_t
|
||||
|
@ -394,23 +399,4 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size,
|
|||
pm_options_free(&options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the source and return true if it parses without errors or warnings.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION bool
|
||||
pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
|
||||
pm_options_t options = { 0 };
|
||||
pm_options_read(&options, data);
|
||||
|
||||
pm_parser_t parser;
|
||||
pm_parser_init(&parser, source, size, &options);
|
||||
|
||||
pm_node_t *node = pm_parse(&parser);
|
||||
pm_node_destroy(&parser, node);
|
||||
|
||||
bool result = parser.error_list.size == 0 && parser.warning_list.size == 0;
|
||||
pm_parser_free(&parser);
|
||||
pm_options_free(&options);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -9,10 +9,13 @@ module Prism
|
|||
codepoints_1byte = 0...0x100
|
||||
encodings = {
|
||||
Encoding::ASCII_8BIT => codepoints_1byte,
|
||||
Encoding::US_ASCII => codepoints_1byte,
|
||||
Encoding::Windows_1253 => codepoints_1byte
|
||||
Encoding::US_ASCII => codepoints_1byte
|
||||
}
|
||||
|
||||
if !ENV["PRISM_BUILD_MINIMAL"]
|
||||
encodings[Encoding::Windows_1253] = codepoints_1byte
|
||||
end
|
||||
|
||||
# By default we don't test every codepoint in these encodings because it
|
||||
# takes a very long time.
|
||||
if ENV["PRISM_TEST_ALL_ENCODINGS"]
|
||||
|
@ -205,21 +208,6 @@ module Prism
|
|||
assert_equal Encoding.find("utf-8"), actual
|
||||
end
|
||||
|
||||
# This test may be a little confusing. Basically when we use our strpbrk, it
|
||||
# takes into account the encoding of the file.
|
||||
def test_strpbrk_multibyte
|
||||
result = Prism.parse(<<~RUBY)
|
||||
# encoding: Shift_JIS
|
||||
%w[\x81\x5c]
|
||||
RUBY
|
||||
|
||||
assert(result.errors.empty?)
|
||||
assert_equal(
|
||||
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
|
||||
result.value.statements.body.first.elements.first.unescaped
|
||||
)
|
||||
end
|
||||
|
||||
def test_utf_8_variations
|
||||
%w[
|
||||
utf-8-unix
|
||||
|
@ -238,22 +226,39 @@ module Prism
|
|||
assert_equal Encoding.find("ascii-8bit"), encoding
|
||||
end
|
||||
|
||||
def test_slice_encoding
|
||||
slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice
|
||||
assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
|
||||
assert_equal Encoding::SHIFT_JIS, slice.encoding
|
||||
end
|
||||
if !ENV["PRISM_BUILD_MINIMAL"]
|
||||
# This test may be a little confusing. Basically when we use our strpbrk,
|
||||
# it takes into account the encoding of the file.
|
||||
def test_strpbrk_multibyte
|
||||
result = Prism.parse(<<~RUBY)
|
||||
# encoding: Shift_JIS
|
||||
%w[\x81\x5c]
|
||||
RUBY
|
||||
|
||||
def test_multibyte_escapes
|
||||
[
|
||||
["'", "'"],
|
||||
["\"", "\""],
|
||||
["`", "`"],
|
||||
["/", "/"],
|
||||
["<<'HERE'\n", "\nHERE"],
|
||||
["<<-HERE\n", "\nHERE"]
|
||||
].each do |opening, closing|
|
||||
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
|
||||
assert(result.errors.empty?)
|
||||
assert_equal(
|
||||
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
|
||||
result.value.statements.body.first.elements.first.unescaped
|
||||
)
|
||||
end
|
||||
|
||||
def test_slice_encoding
|
||||
slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice
|
||||
assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
|
||||
assert_equal Encoding::SHIFT_JIS, slice.encoding
|
||||
end
|
||||
|
||||
def test_multibyte_escapes
|
||||
[
|
||||
["'", "'"],
|
||||
["\"", "\""],
|
||||
["`", "`"],
|
||||
["/", "/"],
|
||||
["<<'HERE'\n", "\nHERE"],
|
||||
["<<-HERE\n", "\nHERE"]
|
||||
].each do |opening, closing|
|
||||
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
return if ENV["PRISM_BUILD_MINIMAL"]
|
||||
|
||||
require_relative "test_helper"
|
||||
|
||||
module Prism
|
||||
# These tests are simply to exercise snippets found by the fuzzer that caused invalid memory access.
|
||||
# These tests are simply to exercise snippets found by the fuzzer that caused
|
||||
# invalid memory access.
|
||||
class FuzzerTest < TestCase
|
||||
def self.snippet(name, source)
|
||||
define_method(:"test_fuzzer_#{name}") { Prism.dump(source) }
|
||||
|
|
|
@ -17,11 +17,11 @@ module Prism
|
|||
"# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-",
|
||||
"# -*- foo: bar; encoding: ascii -*-",
|
||||
"# coding \t \r \v : \t \v \r ascii-8bit",
|
||||
"# vim: filetype=ruby, fileencoding=big5, tabsize=3, shiftwidth=3"
|
||||
"# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3"
|
||||
]
|
||||
|
||||
examples.each do |example|
|
||||
define_method(:"test_magic_comment_#{example}") do
|
||||
examples.each.with_index(1) do |example, index|
|
||||
define_method(:"test_magic_comment_#{index}") do
|
||||
assert_magic_comment(example)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -75,19 +75,21 @@ module Prism
|
|||
assert_equal 5, tokens.length
|
||||
end
|
||||
|
||||
def test_dump_file
|
||||
assert_nothing_raised do
|
||||
Prism.dump_file(__FILE__)
|
||||
end
|
||||
if !ENV["PRISM_BUILD_MINIMAL"]
|
||||
def test_dump_file
|
||||
assert_nothing_raised do
|
||||
Prism.dump_file(__FILE__)
|
||||
end
|
||||
|
||||
error = assert_raise Errno::ENOENT do
|
||||
Prism.dump_file("idontexist.rb")
|
||||
end
|
||||
error = assert_raise Errno::ENOENT do
|
||||
Prism.dump_file("idontexist.rb")
|
||||
end
|
||||
|
||||
assert_equal "No such file or directory - idontexist.rb", error.message
|
||||
assert_equal "No such file or directory - idontexist.rb", error.message
|
||||
|
||||
assert_raise TypeError do
|
||||
Prism.dump_file(nil)
|
||||
assert_raise TypeError do
|
||||
Prism.dump_file(nil)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -259,9 +261,11 @@ module Prism
|
|||
warn("Created snapshot at #{snapshot}.")
|
||||
end
|
||||
|
||||
# Next, assert that the value can be serialized and deserialized without
|
||||
# changing the shape of the tree.
|
||||
assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)
|
||||
if !ENV["PRISM_BUILD_MINIMAL"]
|
||||
# Next, assert that the value can be serialized and deserialized
|
||||
# without changing the shape of the tree.
|
||||
assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)
|
||||
end
|
||||
|
||||
# Next, check that the location ranges of each node in the tree are a
|
||||
# superset of their respective child nodes.
|
||||
|
@ -318,7 +322,9 @@ module Prism
|
|||
result = Prism.parse(snippet, filepath: relative)
|
||||
assert_empty result.errors
|
||||
|
||||
assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
|
||||
if !ENV["PRISM_BUILD_MINIMAL"]
|
||||
assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -4,20 +4,22 @@ require_relative "test_helper"
|
|||
|
||||
module Prism
|
||||
class RubyAPITest < TestCase
|
||||
def test_ruby_api
|
||||
filepath = __FILE__
|
||||
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
|
||||
if !ENV["PRISM_BUILD_MINIMAL"]
|
||||
def test_ruby_api
|
||||
filepath = __FILE__
|
||||
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
|
||||
|
||||
assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value
|
||||
assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath)
|
||||
assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value
|
||||
assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath)
|
||||
|
||||
serialized = Prism.dump(source, filepath: filepath)
|
||||
ast1 = Prism.load(source, serialized).value
|
||||
ast2 = Prism.parse(source, filepath: filepath).value
|
||||
ast3 = Prism.parse_file(filepath).value
|
||||
serialized = Prism.dump(source, filepath: filepath)
|
||||
ast1 = Prism.load(source, serialized).value
|
||||
ast2 = Prism.parse(source, filepath: filepath).value
|
||||
ast3 = Prism.parse_file(filepath).value
|
||||
|
||||
assert_equal_nodes ast1, ast2
|
||||
assert_equal_nodes ast2, ast3
|
||||
assert_equal_nodes ast1, ast2
|
||||
assert_equal_nodes ast2, ast3
|
||||
end
|
||||
end
|
||||
|
||||
def test_parse_success?
|
||||
|
|
|
@ -54,7 +54,7 @@ module Prism
|
|||
|
||||
def test_source_encoding
|
||||
assert_equal "#<Encoding:UTF-8>", static_inspect("__ENCODING__")
|
||||
assert_equal "#<Encoding:Shift_JIS>", static_inspect("__ENCODING__", encoding: "Shift_JIS")
|
||||
assert_equal "#<Encoding:Windows-31J>", static_inspect("__ENCODING__", encoding: "Windows-31J")
|
||||
end
|
||||
|
||||
def test_source_file
|
||||
|
|
Загрузка…
Ссылка в новой задаче