ruby/prism/encoding.h

/**
 * @file encoding.h
 *
 * The encoding interface and implementations used by the parser.
 */
#ifndef PRISM_ENCODING_H
#define PRISM_ENCODING_H

#include "prism/defines.h"
#include "prism/util/pm_strncasecmp.h"

#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

/**
 * This struct defines the functions necessary to implement the encoding
 * interface so we can determine how many bytes the subsequent character takes.
 * Each callback should return the number of bytes, or 0 if the next bytes are
 * invalid for the encoding and type.
 */
typedef struct {
    /**
     * Return the number of bytes that the next character takes if it is valid
     * in the encoding. Does not read more than n bytes. It is assumed that n is
     * at least 1.
     */
    size_t (*char_width)(const uint8_t *b, ptrdiff_t n);

    /**
     * Return the number of bytes that the next character takes if it is valid
     * in the encoding and is alphabetical. Does not read more than n bytes. It
     * is assumed that n is at least 1.
     */
    size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);

    /**
     * Return the number of bytes that the next character takes if it is valid
     * in the encoding and is alphanumeric. Does not read more than n bytes. It
     * is assumed that n is at least 1.
     */
    size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);

    /**
     * Return true if the next character is valid in the encoding and is an
     * uppercase character. Does not read more than n bytes. It is assumed that
     * n is at least 1.
     */
    bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);

    /**
     * The name of the encoding. This should correspond to a value that can be
     * passed to Encoding.find in Ruby.
     */
    const char *name;

    /**
     * Return true if the encoding is a multibyte encoding.
     */
    bool multibyte;
} pm_encoding_t;

/**
 * All of the lookup tables use the first bit of each embedded byte to indicate
 * whether the codepoint is alphabetical.
 */
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0

/**
 * All of the lookup tables use the second bit of each embedded byte to indicate
 * whether the codepoint is alphanumeric.
 */
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1

/**
 * All of the lookup tables use the third bit of each embedded byte to indicate
 * whether the codepoint is uppercase.
 */
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2

/**
 * Return the size of the next character in the UTF-8 encoding.
 *
 * @param b The bytes to read.
 * @param n The number of bytes that can be read.
 * @returns The number of bytes that the next character takes if it is valid in
 *     the encoding, or 0 if it is not.
 */
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);

/**
 * Return the size of the next character in the UTF-8 encoding if it is an
 * alphabetical character.
 *
 * @param b The bytes to read.
 * @param n The number of bytes that can be read.
 * @returns The number of bytes that the next character takes if it is valid in
 *     the encoding, or 0 if it is not.
 */
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);

/**
 * Return the size of the next character in the UTF-8 encoding if it is an
 * alphanumeric character.
 *
 * @param b The bytes to read.
 * @param n The number of bytes that can be read.
 * @returns The number of bytes that the next character takes if it is valid in
 *     the encoding, or 0 if it is not.
 */
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);

/**
 * Return true if the next character in the UTF-8 encoding if it is an uppercase
 * character.
 *
 * @param b The bytes to read.
 * @param n The number of bytes that can be read.
 * @returns True if the next character is valid in the encoding and is an
 *     uppercase character, or false if it is not.
 */
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);

/**
 * This lookup table is referenced in both the UTF-8 encoding file and the
 * parser directly in order to speed up the default encoding processing. It is
 * used to indicate whether a character is alphabetical, alphanumeric, or
 * uppercase in unicode mappings.
 */
extern const uint8_t pm_encoding_unicode_table[256];

/**
 * These are all of the encodings that prism supports.
 */
typedef enum {
    PM_ENCODING_UTF_8 = 0,
    PM_ENCODING_US_ASCII,
    PM_ENCODING_ASCII_8BIT,
    PM_ENCODING_EUC_JP,
    PM_ENCODING_WINDOWS_31J,

// We optionally support excluding the full set of encodings to only support the
// minimum necessary to process Ruby code without encoding comments.
#ifndef PRISM_ENCODING_EXCLUDE_FULL
    PM_ENCODING_BIG5,
    PM_ENCODING_BIG5_HKSCS,
    PM_ENCODING_BIG5_UAO,
    PM_ENCODING_CESU_8,
    PM_ENCODING_CP51932,
    PM_ENCODING_CP850,
    PM_ENCODING_CP852,
    PM_ENCODING_CP855,
    PM_ENCODING_CP949,
    PM_ENCODING_CP950,
    PM_ENCODING_CP951,
    PM_ENCODING_EMACS_MULE,
    PM_ENCODING_EUC_JP_MS,
    PM_ENCODING_EUC_JIS_2004,
    PM_ENCODING_EUC_KR,
    PM_ENCODING_EUC_TW,
    PM_ENCODING_GB12345,
    PM_ENCODING_GB18030,
    PM_ENCODING_GB1988,
    PM_ENCODING_GB2312,
    PM_ENCODING_GBK,
    PM_ENCODING_IBM437,
    PM_ENCODING_IBM720,
    PM_ENCODING_IBM737,
    PM_ENCODING_IBM775,
    PM_ENCODING_IBM852,
    PM_ENCODING_IBM855,
    PM_ENCODING_IBM857,
    PM_ENCODING_IBM860,
    PM_ENCODING_IBM861,
    PM_ENCODING_IBM862,
    PM_ENCODING_IBM863,
    PM_ENCODING_IBM864,
    PM_ENCODING_IBM865,
    PM_ENCODING_IBM866,
    PM_ENCODING_IBM869,
    PM_ENCODING_ISO_8859_1,
    PM_ENCODING_ISO_8859_2,
    PM_ENCODING_ISO_8859_3,
    PM_ENCODING_ISO_8859_4,
    PM_ENCODING_ISO_8859_5,
    PM_ENCODING_ISO_8859_6,
    PM_ENCODING_ISO_8859_7,
    PM_ENCODING_ISO_8859_8,
    PM_ENCODING_ISO_8859_9,
    PM_ENCODING_ISO_8859_10,
    PM_ENCODING_ISO_8859_11,
    PM_ENCODING_ISO_8859_13,
    PM_ENCODING_ISO_8859_14,
    PM_ENCODING_ISO_8859_15,
    PM_ENCODING_ISO_8859_16,
    PM_ENCODING_KOI8_R,
    PM_ENCODING_KOI8_U,
    PM_ENCODING_MAC_CENT_EURO,
    PM_ENCODING_MAC_CROATIAN,
    PM_ENCODING_MAC_CYRILLIC,
    PM_ENCODING_MAC_GREEK,
    PM_ENCODING_MAC_ICELAND,
    PM_ENCODING_MAC_JAPANESE,
    PM_ENCODING_MAC_ROMAN,
    PM_ENCODING_MAC_ROMANIA,
    PM_ENCODING_MAC_THAI,
    PM_ENCODING_MAC_TURKISH,
    PM_ENCODING_MAC_UKRAINE,
    PM_ENCODING_SHIFT_JIS,
    PM_ENCODING_SJIS_DOCOMO,
    PM_ENCODING_SJIS_KDDI,
    PM_ENCODING_SJIS_SOFTBANK,
    PM_ENCODING_STATELESS_ISO_2022_JP,
    PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
    PM_ENCODING_TIS_620,
    PM_ENCODING_UTF8_MAC,
    PM_ENCODING_UTF8_DOCOMO,
    PM_ENCODING_UTF8_KDDI,
    PM_ENCODING_UTF8_SOFTBANK,
    PM_ENCODING_WINDOWS_1250,
    PM_ENCODING_WINDOWS_1251,
    PM_ENCODING_WINDOWS_1252,
    PM_ENCODING_WINDOWS_1253,
    PM_ENCODING_WINDOWS_1254,
    PM_ENCODING_WINDOWS_1255,
    PM_ENCODING_WINDOWS_1256,
    PM_ENCODING_WINDOWS_1257,
    PM_ENCODING_WINDOWS_1258,
    PM_ENCODING_WINDOWS_874,
#endif

    PM_ENCODING_MAXIMUM
} pm_encoding_type_t;

/**
 * This is the table of all of the encodings that prism supports.
 */
extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];

/**
 * This is the default UTF-8 encoding. We need a reference to it to quickly
 * create parsers.
 */
#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])

/**
 * This is the US-ASCII encoding. We need a reference to it to be able to
 * compare against it when a string is being created because it could possibly
 * need to fall back to ASCII-8BIT.
 */
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])

/**
 * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
 * can compare against it because invalid multibyte characters are not a thing
 * in this encoding. It is also needed for handling Regexp encoding flags.
 */
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])

/**
 * This is the EUC-JP encoding. We need a reference to it to quickly process
 * regular expression modifiers.
 */
#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])

/**
 * This is the Windows-31J encoding. We need a reference to it to quickly
 * process regular expression modifiers.
 */
#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])

/**
 * Parse the given name of an encoding and return a pointer to the corresponding
 * encoding struct if one can be found, otherwise return NULL.
 *
 * @param start A pointer to the first byte of the name.
 * @param end A pointer to the last byte of the name.
 * @returns A pointer to the encoding struct if one is found, otherwise NULL.
 */
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);

#endif
[ruby/prism] Last remaining missing C comments https://github.com/ruby/prism/commit/e327449db6 2023-10-31 20:26:31 +03:00			`/**`
[PRISM] Consolidate prism encoding files 2023-11-30 19:36:10 +03:00			`* @file encoding.h`
[ruby/prism] Last remaining missing C comments https://github.com/ruby/prism/commit/e327449db6 2023-10-31 20:26:31 +03:00			`*`
			`* The encoding interface and implementations used by the parser.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`#ifndef PRISM_ENCODING_H`
			`#define PRISM_ENCODING_H`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`#include "prism/defines.h"`
[ruby/prism] Do not expose encodings that do not need to be exposed https://github.com/ruby/prism/commit/c52c7f37ea 2023-11-30 20:00:44 +03:00			`#include "prism/util/pm_strncasecmp.h"`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
Resync YARP 2023-08-15 20:00:54 +03:00			`#include <assert.h>`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00			`#include <stdbool.h>`
			`#include <stddef.h>`
			`#include <stdint.h>`

[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* This struct defines the functions necessary to implement the encoding`
			`* interface so we can determine how many bytes the subsequent character takes.`
			`* Each callback should return the number of bytes, or 0 if the next bytes are`
			`* invalid for the encoding and type.`
			`*/`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00			`typedef struct {`
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* Return the number of bytes that the next character takes if it is valid`
			`* in the encoding. Does not read more than n bytes. It is assumed that n is`
			`* at least 1.`
			`*/`
[ruby/yarp] Switch from handling const char * to const uint8_t * https://github.com/ruby/yarp/commit/465e7bb0a9 2023-08-29 17:48:20 +03:00			`size_t (char_width)(const uint8_t b, ptrdiff_t n);`
Manual YARP resync 2023-06-30 21:30:24 +03:00
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* Return the number of bytes that the next character takes if it is valid`
			`* in the encoding and is alphabetical. Does not read more than n bytes. It`
			`* is assumed that n is at least 1.`
			`*/`
[ruby/yarp] Switch from handling const char * to const uint8_t * https://github.com/ruby/yarp/commit/465e7bb0a9 2023-08-29 17:48:20 +03:00			`size_t (alpha_char)(const uint8_t b, ptrdiff_t n);`
Manual YARP resync 2023-06-30 21:30:24 +03:00
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* Return the number of bytes that the next character takes if it is valid`
			`* in the encoding and is alphanumeric. Does not read more than n bytes. It`
			`* is assumed that n is at least 1.`
			`*/`
[ruby/yarp] Switch from handling const char * to const uint8_t * https://github.com/ruby/yarp/commit/465e7bb0a9 2023-08-29 17:48:20 +03:00			`size_t (alnum_char)(const uint8_t b, ptrdiff_t n);`
Manual YARP resync 2023-06-30 21:30:24 +03:00
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* Return true if the next character is valid in the encoding and is an`
			`* uppercase character. Does not read more than n bytes. It is assumed that`
			`* n is at least 1.`
			`*/`
[ruby/yarp] Switch from handling const char * to const uint8_t * https://github.com/ruby/yarp/commit/465e7bb0a9 2023-08-29 17:48:20 +03:00			`bool (isupper_char)(const uint8_t b, ptrdiff_t n);`
Manual YARP resync 2023-06-30 21:30:24 +03:00
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* The name of the encoding. This should correspond to a value that can be`
			`* passed to Encoding.find in Ruby.`
			`*/`
Manual YARP resync 2023-06-30 21:30:24 +03:00			`const char *name;`

[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* Return true if the encoding is a multibyte encoding.`
			`*/`
Manual YARP resync 2023-06-30 21:30:24 +03:00			`bool multibyte;`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`} pm_encoding_t;`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
[ruby/prism] Last remaining missing C comments https://github.com/ruby/prism/commit/e327449db6 2023-10-31 20:26:31 +03:00			`/**`
			`* All of the lookup tables use the first bit of each embedded byte to indicate`
			`* whether the codepoint is alphabetical.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0`
[ruby/prism] Last remaining missing C comments https://github.com/ruby/prism/commit/e327449db6 2023-10-31 20:26:31 +03:00
			`/**`
			`* All of the lookup tables use the second bit of each embedded byte to indicate`
			`* whether the codepoint is alphanumeric.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1`
[ruby/prism] Last remaining missing C comments https://github.com/ruby/prism/commit/e327449db6 2023-10-31 20:26:31 +03:00
			`/**`
			`* All of the lookup tables use the third bit of each embedded byte to indicate`
			`* whether the codepoint is uppercase.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
[ruby/prism] Always return the character width for char_is_identifier_start() and char_is_identifier_utf8() * This is also faster than calling pm_encoding_utf_8_alpha_char/pm_encoding_utf_8_alnum_char as those compute the character width and do extra checks. https://github.com/ruby/prism/commit/4cb276ac4c 2024-02-01 00:19:36 +03:00			`/**`
			`* Return the size of the next character in the UTF-8 encoding.`
			`*`
			`* @param b The bytes to read.`
			`* @param n The number of bytes that can be read.`
			`* @returns The number of bytes that the next character takes if it is valid in`
			`* the encoding, or 0 if it is not.`
			`*/`
			`size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);`

[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* Return the size of the next character in the UTF-8 encoding if it is an`
			`* alphabetical character.`
			`*`
			`* @param b The bytes to read.`
			`* @param n The number of bytes that can be read.`
			`* @returns The number of bytes that the next character takes if it is valid in`
			`* the encoding, or 0 if it is not.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);`
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00
			`/**`
			`* Return the size of the next character in the UTF-8 encoding if it is an`
			`* alphanumeric character.`
			`*`
			`* @param b The bytes to read.`
			`* @param n The number of bytes that can be read.`
			`* @returns The number of bytes that the next character takes if it is valid in`
			`* the encoding, or 0 if it is not.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);`
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00
			`/**`
			`* Return true if the next character in the UTF-8 encoding if it is an uppercase`
			`* character.`
			`*`
			`* @param b The bytes to read.`
			`* @param n The number of bytes that can be read.`
			`* @returns True if the next character is valid in the encoding and is an`
			`* uppercase character, or false if it is not.`
			`*/`
[ruby/prism] Faster lex_identifier https://github.com/ruby/prism/commit/e44a9ae742 2023-10-30 17:47:46 +03:00			`bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
[ruby/prism] Documentation for the encodings https://github.com/ruby/prism/commit/52a0d80a15 2023-10-31 15:54:52 +03:00			`/**`
			`* This lookup table is referenced in both the UTF-8 encoding file and the`
			`* parser directly in order to speed up the default encoding processing. It is`
			`* used to indicate whether a character is alphabetical, alphanumeric, or`
			`* uppercase in unicode mappings.`
			`*/`
Sync to prism rename commits 2023-09-27 19:24:48 +03:00			`extern const uint8_t pm_encoding_unicode_table[256];`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
[ruby/prism] Do not expose encodings that do not need to be exposed https://github.com/ruby/prism/commit/c52c7f37ea 2023-11-30 20:00:44 +03:00			`/**`
[ruby/prism] Update documentation for encodings https://github.com/ruby/prism/commit/18e6df0d4f 2023-12-06 22:26:27 +03:00			`* These are all of the encodings that prism supports.`
[ruby/prism] Do not expose encodings that do not need to be exposed https://github.com/ruby/prism/commit/c52c7f37ea 2023-11-30 20:00:44 +03:00			`*/`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`typedef enum {`
			`PM_ENCODING_UTF_8 = 0,`
[ruby/prism] Provide options for reducing size https://github.com/ruby/prism/commit/592128de4d 2024-03-20 17:08:13 +03:00			`PM_ENCODING_US_ASCII,`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`PM_ENCODING_ASCII_8BIT,`
[ruby/prism] Provide options for reducing size https://github.com/ruby/prism/commit/592128de4d 2024-03-20 17:08:13 +03:00			`PM_ENCODING_EUC_JP,`
			`PM_ENCODING_WINDOWS_31J,`

			`// We optionally support excluding the full set of encodings to only support the`
			`// minimum necessary to process Ruby code without encoding comments.`
			`#ifndef PRISM_ENCODING_EXCLUDE_FULL`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`PM_ENCODING_BIG5,`
			`PM_ENCODING_BIG5_HKSCS,`
			`PM_ENCODING_BIG5_UAO,`
			`PM_ENCODING_CESU_8,`
			`PM_ENCODING_CP51932,`
			`PM_ENCODING_CP850,`
			`PM_ENCODING_CP852,`
			`PM_ENCODING_CP855,`
			`PM_ENCODING_CP949,`
			`PM_ENCODING_CP950,`
			`PM_ENCODING_CP951,`
			`PM_ENCODING_EMACS_MULE,`
			`PM_ENCODING_EUC_JP_MS,`
			`PM_ENCODING_EUC_JIS_2004,`
			`PM_ENCODING_EUC_KR,`
			`PM_ENCODING_EUC_TW,`
			`PM_ENCODING_GB12345,`
			`PM_ENCODING_GB18030,`
			`PM_ENCODING_GB1988,`
			`PM_ENCODING_GB2312,`
			`PM_ENCODING_GBK,`
			`PM_ENCODING_IBM437,`
			`PM_ENCODING_IBM720,`
			`PM_ENCODING_IBM737,`
			`PM_ENCODING_IBM775,`
			`PM_ENCODING_IBM852,`
			`PM_ENCODING_IBM855,`
			`PM_ENCODING_IBM857,`
			`PM_ENCODING_IBM860,`
			`PM_ENCODING_IBM861,`
			`PM_ENCODING_IBM862,`
			`PM_ENCODING_IBM863,`
			`PM_ENCODING_IBM864,`
			`PM_ENCODING_IBM865,`
			`PM_ENCODING_IBM866,`
			`PM_ENCODING_IBM869,`
			`PM_ENCODING_ISO_8859_1,`
			`PM_ENCODING_ISO_8859_2,`
			`PM_ENCODING_ISO_8859_3,`
			`PM_ENCODING_ISO_8859_4,`
			`PM_ENCODING_ISO_8859_5,`
			`PM_ENCODING_ISO_8859_6,`
			`PM_ENCODING_ISO_8859_7,`
			`PM_ENCODING_ISO_8859_8,`
			`PM_ENCODING_ISO_8859_9,`
			`PM_ENCODING_ISO_8859_10,`
			`PM_ENCODING_ISO_8859_11,`
			`PM_ENCODING_ISO_8859_13,`
			`PM_ENCODING_ISO_8859_14,`
			`PM_ENCODING_ISO_8859_15,`
			`PM_ENCODING_ISO_8859_16,`
			`PM_ENCODING_KOI8_R,`
			`PM_ENCODING_KOI8_U,`
			`PM_ENCODING_MAC_CENT_EURO,`
			`PM_ENCODING_MAC_CROATIAN,`
			`PM_ENCODING_MAC_CYRILLIC,`
			`PM_ENCODING_MAC_GREEK,`
			`PM_ENCODING_MAC_ICELAND,`
			`PM_ENCODING_MAC_JAPANESE,`
			`PM_ENCODING_MAC_ROMAN,`
			`PM_ENCODING_MAC_ROMANIA,`
			`PM_ENCODING_MAC_THAI,`
			`PM_ENCODING_MAC_TURKISH,`
			`PM_ENCODING_MAC_UKRAINE,`
			`PM_ENCODING_SHIFT_JIS,`
			`PM_ENCODING_SJIS_DOCOMO,`
			`PM_ENCODING_SJIS_KDDI,`
			`PM_ENCODING_SJIS_SOFTBANK,`
			`PM_ENCODING_STATELESS_ISO_2022_JP,`
			`PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,`
			`PM_ENCODING_TIS_620,`
			`PM_ENCODING_UTF8_MAC,`
			`PM_ENCODING_UTF8_DOCOMO,`
			`PM_ENCODING_UTF8_KDDI,`
			`PM_ENCODING_UTF8_SOFTBANK,`
			`PM_ENCODING_WINDOWS_1250,`
			`PM_ENCODING_WINDOWS_1251,`
			`PM_ENCODING_WINDOWS_1252,`
			`PM_ENCODING_WINDOWS_1253,`
			`PM_ENCODING_WINDOWS_1254,`
			`PM_ENCODING_WINDOWS_1255,`
			`PM_ENCODING_WINDOWS_1256,`
			`PM_ENCODING_WINDOWS_1257,`
			`PM_ENCODING_WINDOWS_1258,`
[ruby/prism] Provide flags for changing encodings https://github.com/ruby/prism/commit/e838eaff6f 2023-12-04 20:51:22 +03:00			`PM_ENCODING_WINDOWS_874,`
[ruby/prism] Provide options for reducing size https://github.com/ruby/prism/commit/592128de4d 2024-03-20 17:08:13 +03:00			`#endif`

[ruby/prism] Provide flags for changing encodings https://github.com/ruby/prism/commit/e838eaff6f 2023-12-04 20:51:22 +03:00			`PM_ENCODING_MAXIMUM`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`} pm_encoding_type_t;`

			`/**`
[ruby/prism] Update documentation for encodings https://github.com/ruby/prism/commit/18e6df0d4f 2023-12-06 22:26:27 +03:00			`* This is the table of all of the encodings that prism supports.`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`*/`
[ruby/prism] Provide flags for changing encodings https://github.com/ruby/prism/commit/e838eaff6f 2023-12-04 20:51:22 +03:00			`extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];`

			`/**`
			`* This is the default UTF-8 encoding. We need a reference to it to quickly`
			`* create parsers.`
			`*/`
			`#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])`

			`/**`
			`* This is the US-ASCII encoding. We need a reference to it to be able to`
			`* compare against it when a string is being created because it could possibly`
			`* need to fall back to ASCII-8BIT.`
			`*/`
			`#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])`
[ruby/prism] Do not expose encodings that do not need to be exposed https://github.com/ruby/prism/commit/c52c7f37ea 2023-11-30 20:00:44 +03:00
[ruby/prism] Validate multibyte characters in strings Check that multibyte characters are valid using pm_strpbrk. We need to add a couple of codepaths to ensure all encodings are covered. Importantly this doesn't check regular expressions, because apparently you're allowed to have invalid multibyte characters inside regular expression comment groups/extended mode. https://github.com/ruby/prism/commit/2857d3e1b5 2024-02-14 01:45:27 +03:00			`/**`
			`* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk`
			`* can compare against it because invalid multibyte characters are not a thing`
[ruby/prism] Track both the unescaped bytes and source string for a regular expression so we can accurately set its encoding flags. https://github.com/ruby/prism/commit/dc6dd3a926 2024-02-09 00:27:59 +03:00			`* in this encoding. It is also needed for handling Regexp encoding flags.`
[ruby/prism] Validate multibyte characters in strings Check that multibyte characters are valid using pm_strpbrk. We need to add a couple of codepaths to ensure all encodings are covered. Importantly this doesn't check regular expressions, because apparently you're allowed to have invalid multibyte characters inside regular expression comment groups/extended mode. https://github.com/ruby/prism/commit/2857d3e1b5 2024-02-14 01:45:27 +03:00			`*/`
			`#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])`

[ruby/prism] Add initial implementation of `Regexp` validation. https://github.com/ruby/prism/commit/6bf1b8edf0 2024-02-16 05:27:46 +03:00			`/**`
			`* This is the EUC-JP encoding. We need a reference to it to quickly process`
			`* regular expression modifiers.`
			`*/`
			`#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])`

			`/**`
			`* This is the Windows-31J encoding. We need a reference to it to quickly`
			`* process regular expression modifiers.`
			`*/`
			`#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])`

[ruby/prism] Do not expose encodings that do not need to be exposed https://github.com/ruby/prism/commit/c52c7f37ea 2023-11-30 20:00:44 +03:00			`/**`
			`* Parse the given name of an encoding and return a pointer to the corresponding`
			`* encoding struct if one can be found, otherwise return NULL.`
			`*`
			`* @param start A pointer to the first byte of the name.`
			`* @param end A pointer to the last byte of the name.`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`* @returns A pointer to the encoding struct if one is found, otherwise NULL.`
[ruby/prism] Do not expose encodings that do not need to be exposed https://github.com/ruby/prism/commit/c52c7f37ea 2023-11-30 20:00:44 +03:00			`*/`
[ruby/prism] Group encodings into a single array https://github.com/ruby/prism/commit/f4b7beadc9 2023-11-30 20:50:49 +03:00			`const pm_encoding_t * pm_encoding_find(const uint8_t start, const uint8_t end);`
[Feature #19741] Sync all files in yarp This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp 2023-06-20 18:53:02 +03:00
			`#endif`