зеркало из https://github.com/github/ruby.git
[ruby/prism] Last remaining missing C comments
https://github.com/ruby/prism/commit/e327449db6
This commit is contained in:
Родитель
e745af2f0e
Коммит
690f3bbf5d
|
@ -59,11 +59,11 @@ tokens:
|
|||
- name: CONSTANT
|
||||
comment: "a constant"
|
||||
- name: DOT
|
||||
comment: "."
|
||||
comment: "the . call operator"
|
||||
- name: DOT_DOT
|
||||
comment: ".."
|
||||
comment: "the .. range operator"
|
||||
- name: DOT_DOT_DOT
|
||||
comment: "..."
|
||||
comment: "the ... range operator or forwarding parameter"
|
||||
- name: EMBDOC_BEGIN
|
||||
comment: "=begin"
|
||||
- name: EMBDOC_END
|
||||
|
@ -311,9 +311,9 @@ tokens:
|
|||
- name: UCOLON_COLON
|
||||
comment: "unary ::"
|
||||
- name: UDOT_DOT
|
||||
comment: "unary .."
|
||||
comment: "unary .. operator"
|
||||
- name: UDOT_DOT_DOT
|
||||
comment: "unary ..."
|
||||
comment: "unary ... operator"
|
||||
- name: UMINUS
|
||||
comment: "-@"
|
||||
- name: UMINUS_NUM
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
/**
|
||||
* @file defines.h
|
||||
*
|
||||
* Macro definitions used throughout the prism library.
|
||||
*
|
||||
* This file should be included first by any *.h or *.c in prism for consistency
|
||||
* and to ensure that the macros are defined before they are used.
|
||||
*/
|
||||
#ifndef PRISM_DEFINES_H
|
||||
#define PRISM_DEFINES_H
|
||||
|
||||
// This file should be included first by any *.h or *.c in prism.
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
|
@ -10,7 +16,11 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// PRISM_EXPORTED_FUNCTION
|
||||
/**
|
||||
* By default, we compile with -fvisibility=hidden. When this is enabled, we
|
||||
* need to mark certain functions as being publically-visible. This macro does
|
||||
* that in a compiler-agnostic way.
|
||||
*/
|
||||
#ifndef PRISM_EXPORTED_FUNCTION
|
||||
# ifdef PRISM_EXPORT_SYMBOLS
|
||||
# ifdef _WIN32
|
||||
|
@ -23,7 +33,12 @@
|
|||
# endif
|
||||
#endif
|
||||
|
||||
// PRISM_ATTRIBUTE_FORMAT
|
||||
/**
|
||||
* Certain compilers support specifying that a function accepts variadic
|
||||
* parameters that look like printf format strings to provide a better developer
|
||||
* experience when someone is using the function. This macro does that in a
|
||||
* compiler-agnostic way.
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
|
||||
#elif defined(__clang__)
|
||||
|
@ -32,19 +47,29 @@
|
|||
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
|
||||
#endif
|
||||
|
||||
// PRISM_ATTRIBUTE_UNUSED
|
||||
/**
|
||||
* GCC will warn if you specify a function or parameter that is unused at
|
||||
* runtime. This macro allows you to mark a function or parameter as unused in a
|
||||
* compiler-agnostic way.
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
|
||||
#else
|
||||
# define PRISM_ATTRIBUTE_UNUSED
|
||||
#endif
|
||||
|
||||
// inline
|
||||
/**
|
||||
* Old Visual Studio versions do not support the inline keyword, so we need to
|
||||
* define it to be __inline.
|
||||
*/
|
||||
#if defined(_MSC_VER) && !defined(inline)
|
||||
# define inline __inline
|
||||
#endif
|
||||
|
||||
// Windows versions before 2015 use _snprintf
|
||||
/**
|
||||
* Old Visual Studio versions before 2015 do not implement sprintf, but instead
|
||||
* implement _snprintf. We standard that here.
|
||||
*/
|
||||
#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
# define snprintf _snprintf
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file diagnostic.h
|
||||
*
|
||||
* A list of diagnostics generated during parsing.
|
||||
*/
|
||||
#ifndef PRISM_DIAGNOSTIC_H
|
||||
#define PRISM_DIAGNOSTIC_H
|
||||
|
||||
|
@ -9,14 +14,21 @@
|
|||
#include <assert.h>
|
||||
|
||||
/**
|
||||
* This struct represents a diagnostic found during parsing.
|
||||
* This struct represents a diagnostic generated during parsing.
|
||||
*
|
||||
* @extends pm_list_node_t
|
||||
*/
|
||||
typedef struct {
|
||||
/** The embedded base node. */
|
||||
pm_list_node_t node;
|
||||
|
||||
/** A pointer to the start of the source that generated the diagnostic. */
|
||||
const uint8_t *start;
|
||||
|
||||
/** A pointer to the end of the source that generated the diagnostic. */
|
||||
const uint8_t *end;
|
||||
|
||||
/** The message associated with the diagnostic. */
|
||||
const char *message;
|
||||
} pm_diagnostic_t;
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_encoding.h
|
||||
*
|
||||
* The encoding interface and implementations used by the parser.
|
||||
*/
|
||||
#ifndef PRISM_ENCODING_H
|
||||
#define PRISM_ENCODING_H
|
||||
|
||||
|
@ -55,10 +60,22 @@ typedef struct {
|
|||
bool multibyte;
|
||||
} pm_encoding_t;
|
||||
|
||||
// These bits define the location of each bit of metadata within the various
|
||||
// lookup tables that are used to determine the properties of a character.
|
||||
/**
|
||||
* All of the lookup tables use the first bit of each embedded byte to indicate
|
||||
* whether the codepoint is alphabetical.
|
||||
*/
|
||||
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
|
||||
|
||||
/**
|
||||
* All of the lookup tables use the second bit of each embedded byte to indicate
|
||||
* whether the codepoint is alphanumeric.
|
||||
*/
|
||||
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
|
||||
|
||||
/**
|
||||
* All of the lookup tables use the third bit of each embedded byte to indicate
|
||||
* whether the codepoint is uppercase.
|
||||
*/
|
||||
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file node.h
|
||||
*
|
||||
* Functions related to nodes in the AST.
|
||||
*/
|
||||
#ifndef PRISM_NODE_H
|
||||
#define PRISM_NODE_H
|
||||
|
||||
|
|
77
prism/pack.h
77
prism/pack.h
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pack.h
|
||||
*
|
||||
* A pack template string parser.
|
||||
*/
|
||||
#ifndef PRISM_PACK_H
|
||||
#define PRISM_PACK_H
|
||||
|
||||
|
@ -6,15 +11,18 @@
|
|||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/** The version of the pack template language that we are parsing. */
|
||||
typedef enum pm_pack_version {
|
||||
PM_PACK_VERSION_3_2_0
|
||||
} pm_pack_version;
|
||||
|
||||
/** The type of pack template we are parsing. */
|
||||
typedef enum pm_pack_variant {
|
||||
PM_PACK_VARIANT_PACK,
|
||||
PM_PACK_VARIANT_UNPACK
|
||||
} pm_pack_variant;
|
||||
|
||||
/** A directive within the pack template. */
|
||||
typedef enum pm_pack_type {
|
||||
PM_PACK_SPACE,
|
||||
PM_PACK_COMMENT,
|
||||
|
@ -40,12 +48,14 @@ typedef enum pm_pack_type {
|
|||
PM_PACK_END
|
||||
} pm_pack_type;
|
||||
|
||||
/** The signness of a pack directive. */
|
||||
typedef enum pm_pack_signed {
|
||||
PM_PACK_UNSIGNED,
|
||||
PM_PACK_SIGNED,
|
||||
PM_PACK_SIGNED_NA
|
||||
} pm_pack_signed;
|
||||
|
||||
/** The endianness of a pack directive. */
|
||||
typedef enum pm_pack_endian {
|
||||
PM_PACK_AGNOSTIC_ENDIAN,
|
||||
PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
|
||||
|
@ -54,6 +64,7 @@ typedef enum pm_pack_endian {
|
|||
PM_PACK_ENDIAN_NA
|
||||
} pm_pack_endian;
|
||||
|
||||
/** The size of an integer pack directive. */
|
||||
typedef enum pm_pack_size {
|
||||
PM_PACK_SIZE_SHORT,
|
||||
PM_PACK_SIZE_INT,
|
||||
|
@ -67,6 +78,7 @@ typedef enum pm_pack_size {
|
|||
PM_PACK_SIZE_NA
|
||||
} pm_pack_size;
|
||||
|
||||
/** The type of length of a pack directive. */
|
||||
typedef enum pm_pack_length_type {
|
||||
PM_PACK_LENGTH_FIXED,
|
||||
PM_PACK_LENGTH_MAX,
|
||||
|
@ -74,6 +86,7 @@ typedef enum pm_pack_length_type {
|
|||
PM_PACK_LENGTH_NA
|
||||
} pm_pack_length_type;
|
||||
|
||||
/** The type of encoding for a pack template string. */
|
||||
typedef enum pm_pack_encoding {
|
||||
PM_PACK_ENCODING_START,
|
||||
PM_PACK_ENCODING_ASCII_8BIT,
|
||||
|
@ -81,6 +94,7 @@ typedef enum pm_pack_encoding {
|
|||
PM_PACK_ENCODING_UTF_8
|
||||
} pm_pack_encoding;
|
||||
|
||||
/** The result of parsing a pack template. */
|
||||
typedef enum pm_pack_result {
|
||||
PM_PACK_OK,
|
||||
PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
|
||||
|
@ -90,39 +104,31 @@ typedef enum pm_pack_result {
|
|||
PM_PACK_ERROR_DOUBLE_ENDIAN
|
||||
} pm_pack_result;
|
||||
|
||||
// Parse a single directive from a pack or unpack format string.
|
||||
//
|
||||
// Parameters:
|
||||
// - [in] pm_pack_version version the version of Ruby
|
||||
// - [in] pm_pack_variant variant pack or unpack
|
||||
// - [in out] const char **format the start of the next directive to parse
|
||||
// on calling, and advanced beyond the parsed directive on return, or as
|
||||
// much of it as was consumed until an error was encountered
|
||||
// - [in] const char *format_end the end of the format string
|
||||
// - [out] pm_pack_type *type the type of the directive
|
||||
// - [out] pm_pack_signed *signed_type
|
||||
// whether the value is signed
|
||||
// - [out] pm_pack_endian *endian the endianness of the value
|
||||
// - [out] pm_pack_size *size the size of the value
|
||||
// - [out] pm_pack_length_type *length_type
|
||||
// what kind of length is specified
|
||||
// - [out] size_t *length the length of the directive
|
||||
// - [in out] pm_pack_encoding *encoding
|
||||
// takes the current encoding of the string
|
||||
// which would result from parsing the whole format string, and returns a
|
||||
// possibly changed directive - the encoding should be
|
||||
// PM_PACK_ENCODING_START when pm_pack_parse is called for the first
|
||||
// directive in a format string
|
||||
//
|
||||
// Return:
|
||||
// - PM_PACK_OK on success
|
||||
// - PM_PACK_ERROR_* on error
|
||||
//
|
||||
// Notes:
|
||||
// Consult Ruby documentation for the meaning of directives.
|
||||
/**
|
||||
* Parse a single directive from a pack or unpack format string.
|
||||
*
|
||||
* @param variant (in) pack or unpack
|
||||
* @param format (in, out) the start of the next directive to parse on calling,
|
||||
* and advanced beyond the parsed directive on return, or as much of it as
|
||||
* was consumed until an error was encountered
|
||||
* @param format_end (in) the end of the format string
|
||||
* @param type (out) the type of the directive
|
||||
* @param signed_type (out) whether the value is signed
|
||||
* @param endian (out) the endianness of the value
|
||||
* @param size (out) the size of the value
|
||||
* @param length_type (out) what kind of length is specified
|
||||
* @param length (out) the length of the directive
|
||||
* @param encoding (in, out) takes the current encoding of the string which
|
||||
* would result from parsing the whole format string, and returns a possibly
|
||||
* changed directive - the encoding should be `PM_PACK_ENCODING_START` when
|
||||
* pm_pack_parse is called for the first directive in a format string
|
||||
*
|
||||
* @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
|
||||
* @note Consult Ruby documentation for the meaning of directives.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION pm_pack_result
|
||||
pm_pack_parse(
|
||||
pm_pack_variant variant_arg,
|
||||
pm_pack_variant variant,
|
||||
const char **format,
|
||||
const char *format_end,
|
||||
pm_pack_type *type,
|
||||
|
@ -134,8 +140,13 @@ pm_pack_parse(
|
|||
pm_pack_encoding *encoding
|
||||
);
|
||||
|
||||
// prism abstracts sizes away from the native system - this converts an abstract
|
||||
// size to a native size.
|
||||
/**
|
||||
* Prism abstracts sizes away from the native system - this converts an abstract
|
||||
* size to a native size.
|
||||
*
|
||||
* @param size The abstract size to convert.
|
||||
* @return The native size.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file parser.h
|
||||
*
|
||||
* The parser used to parse Ruby source.
|
||||
*/
|
||||
#ifndef PRISM_PARSER_H
|
||||
#define PRISM_PARSER_H
|
||||
|
||||
|
@ -84,6 +89,7 @@ typedef enum {
|
|||
* are found as part of a string.
|
||||
*/
|
||||
typedef struct pm_lex_mode {
|
||||
/** The type of this lex mode. */
|
||||
enum {
|
||||
/** This state is used when any given token is being lexed. */
|
||||
PM_LEX_DEFAULT,
|
||||
|
@ -122,6 +128,7 @@ typedef struct pm_lex_mode {
|
|||
PM_LEX_STRING
|
||||
} mode;
|
||||
|
||||
/** The data associated with this type of lex mode. */
|
||||
union {
|
||||
struct {
|
||||
/** This keeps track of the nesting level of the list. */
|
||||
|
@ -240,8 +247,9 @@ typedef struct pm_lex_mode {
|
|||
*/
|
||||
#define PM_LEX_STACK_SIZE 4
|
||||
|
||||
// A forward declaration since our error handler struct accepts a parser for
|
||||
// each of its function calls.
|
||||
/**
|
||||
* The parser used to parse Ruby source.
|
||||
*/
|
||||
typedef struct pm_parser pm_parser_t;
|
||||
|
||||
/**
|
||||
|
@ -343,7 +351,10 @@ typedef enum {
|
|||
|
||||
/** This is a node in a linked list of contexts. */
|
||||
typedef struct pm_context_node {
|
||||
/** The context that this node represents. */
|
||||
pm_context_t context;
|
||||
|
||||
/** A pointer to the previous context in the linked list. */
|
||||
struct pm_context_node *prev;
|
||||
} pm_context_node_t;
|
||||
|
||||
|
@ -360,9 +371,16 @@ typedef enum {
|
|||
* @extends pm_list_node_t
|
||||
*/
|
||||
typedef struct pm_comment {
|
||||
/** The embedded base node. */
|
||||
pm_list_node_t node;
|
||||
|
||||
/** A pointer to the start of the comment in the source. */
|
||||
const uint8_t *start;
|
||||
|
||||
/** A pointer to the end of the comment in the source. */
|
||||
const uint8_t *end;
|
||||
|
||||
/** The type of comment that we've found. */
|
||||
pm_comment_type_t type;
|
||||
} pm_comment_t;
|
||||
|
||||
|
@ -373,10 +391,19 @@ typedef struct pm_comment {
|
|||
* @extends pm_list_node_t
|
||||
*/
|
||||
typedef struct {
|
||||
/** The embedded base node. */
|
||||
pm_list_node_t node;
|
||||
|
||||
/** A pointer to the start of the key in the source. */
|
||||
const uint8_t *key_start;
|
||||
|
||||
/** A pointer to the start of the value in the source. */
|
||||
const uint8_t *value_start;
|
||||
|
||||
/** The length of the key in the source. */
|
||||
uint32_t key_length;
|
||||
|
||||
/** The length of the value in the source. */
|
||||
uint32_t value_length;
|
||||
} pm_magic_comment_t;
|
||||
|
||||
|
@ -493,6 +520,7 @@ struct pm_parser {
|
|||
*/
|
||||
pm_state_stack_t accepts_block_stack;
|
||||
|
||||
/** A stack of lex modes. */
|
||||
struct {
|
||||
/** The current mode of the lexer. */
|
||||
pm_lex_mode_t *current;
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file prettyprint.h
|
||||
*
|
||||
* An AST node pretty-printer.
|
||||
*/
|
||||
#ifndef PRISM_PRETTYPRINT_H
|
||||
#define PRISM_PRETTYPRINT_H
|
||||
|
||||
|
|
120
prism/prism.c
120
prism/prism.c
|
@ -1,104 +1,5 @@
|
|||
#include "prism.h"
|
||||
|
||||
/**
|
||||
* @mainpage
|
||||
*
|
||||
* Prism is a parser for the Ruby programming language. It is designed to be
|
||||
* portable, error tolerant, and maintainable. It is written in C99 and has no
|
||||
* dependencies. It is currently being integrated into
|
||||
* [CRuby](https://github.com/ruby/ruby),
|
||||
* [JRuby](https://github.com/jruby/jruby),
|
||||
* [TruffleRuby](https://github.com/oracle/truffleruby),
|
||||
* [Sorbet](https://github.com/sorbet/sorbet), and
|
||||
* [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
|
||||
*
|
||||
* @section getting-started Getting started
|
||||
*
|
||||
* If you're vendoring this project and compiling it statically then as long as
|
||||
* you have a C99 compiler you will be fine. If you're linking against it as
|
||||
* shared library, then you should compile with `-fvisibility=hidden` and
|
||||
* `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
|
||||
* visible.
|
||||
*
|
||||
* @section parsing Parsing
|
||||
*
|
||||
* In order to parse Ruby code, the structures and functions that you're going
|
||||
* to want to use and be aware of are:
|
||||
*
|
||||
* * @ref pm_parser_t - the main parser structure
|
||||
* * @ref pm_parser_init - initialize a parser
|
||||
* * @ref pm_parse - parse and return the root node
|
||||
* * @ref pm_node_destroy - deallocate the root node returned by `pm_parse`
|
||||
* * @ref pm_parser_free - free the internal memory of the parser
|
||||
*
|
||||
* Putting all of this together would look something like:
|
||||
*
|
||||
* ```c
|
||||
* void parse(const uint8_t *source, size_t length) {
|
||||
* pm_parser_t parser;
|
||||
* pm_parser_init(&parser, source, length, NULL);
|
||||
*
|
||||
* pm_node_t *root = pm_parse(&parser);
|
||||
* printf("PARSED!\n");
|
||||
*
|
||||
* pm_node_destroy(root);
|
||||
* pm_parser_free(&parser);
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* All of the nodes "inherit" from `pm_node_t` by embedding those structures as
|
||||
* their first member. This means you can downcast and upcast any node in the
|
||||
* tree to a `pm_node_t`.
|
||||
*
|
||||
* @section serializing Serializing
|
||||
*
|
||||
* Prism provides the ability to serialize the AST and its related metadata into
|
||||
* a binary format. This format is designed to be portable to different
|
||||
* languages and runtimes so that you only need to make one FFI call in order to
|
||||
* parse Ruby code. The structures and functions that you're going to want to
|
||||
* use and be aware of are:
|
||||
*
|
||||
* * @ref pm_buffer_t - a small buffer object that will hold the serialized AST
|
||||
* * @ref pm_buffer_free - free the memory associated with the buffer
|
||||
* * @ref pm_serialize - serialize the AST into a buffer
|
||||
* * @ref pm_parse_serialize - parse and serialize the AST into a buffer
|
||||
*
|
||||
* Putting all of this together would look something like:
|
||||
*
|
||||
* ```c
|
||||
* void serialize(const uint8_t *source, size_t length) {
|
||||
* pm_buffer_t buffer = { 0 };
|
||||
*
|
||||
* pm_parse_serialize(source, length, &buffer, NULL);
|
||||
* printf("SERIALIZED!\n");
|
||||
*
|
||||
* pm_buffer_free(&buffer);
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @section inspecting Inspecting
|
||||
*
|
||||
* Prism provides the ability to inspect the AST by pretty-printing nodes. You
|
||||
* can do this with the `pm_prettyprint` function, which you would use like:
|
||||
*
|
||||
* ```c
|
||||
* void prettyprint(const uint8_t *source, size_t length) {
|
||||
* pm_parser_t parser;
|
||||
* pm_parser_init(&parser, source, length, NULL);
|
||||
*
|
||||
* pm_node_t *root = pm_parse(&parser);
|
||||
* pm_buffer_t buffer = { 0 };
|
||||
*
|
||||
* pm_prettyprint(&buffer, &parser, root);
|
||||
* printf("*.s%\n", (int) buffer.length, buffer.value);
|
||||
*
|
||||
* pm_buffer_free(&buffer);
|
||||
* pm_node_destroy(root);
|
||||
* pm_parser_free(&parser);
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
/**
|
||||
* The prism version and the serialization format.
|
||||
*/
|
||||
|
@ -764,9 +665,16 @@ not_provided(pm_parser_t *parser) {
|
|||
* of the call node creation functions.
|
||||
*/
|
||||
typedef struct {
|
||||
/** The optional location of the opening parenthesis or bracket. */
|
||||
pm_location_t opening_loc;
|
||||
|
||||
/** The lazily-allocated optional arguments node. */
|
||||
pm_arguments_node_t *arguments;
|
||||
|
||||
/** The optional location of the closing parenthesis or bracket. */
|
||||
pm_location_t closing_loc;
|
||||
|
||||
/** The optional block attached to the call. */
|
||||
pm_node_t *block;
|
||||
} pm_arguments_t;
|
||||
|
||||
|
@ -7668,7 +7576,16 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
|
|||
* automatically attach the string content to the node that it belongs to.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* The buffer that we're using to keep track of the string content. It will
|
||||
* only be initialized if we receive an escape sequence.
|
||||
*/
|
||||
pm_buffer_t buffer;
|
||||
|
||||
/**
|
||||
* The cursor into the source string that points to how far we have
|
||||
* currently copied into the buffer.
|
||||
*/
|
||||
const uint8_t *cursor;
|
||||
} pm_token_buffer_t;
|
||||
|
||||
|
@ -9835,8 +9752,13 @@ typedef enum {
|
|||
* are combined in this way to make it easier to represent associativity.
|
||||
*/
|
||||
typedef struct {
|
||||
/** The left binding power. */
|
||||
pm_binding_power_t left;
|
||||
|
||||
/** The right binding power. */
|
||||
pm_binding_power_t right;
|
||||
|
||||
/** Whether or not this token can be used as a binary operator. */
|
||||
bool binary;
|
||||
} pm_binding_powers_t;
|
||||
|
||||
|
|
108
prism/prism.h
108
prism/prism.h
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file prism.h
|
||||
*
|
||||
* The main header file for the prism parser.
|
||||
*/
|
||||
#ifndef PRISM_H
|
||||
#define PRISM_H
|
||||
|
||||
|
@ -75,10 +80,10 @@ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_pars
|
|||
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
|
||||
|
||||
/**
|
||||
* Parse the Ruby source associated with the given parser and return the tree.
|
||||
* Initiate the parser with the given parser.
|
||||
*
|
||||
* @param parser The parser to use.
|
||||
* @return The AST representing the Ruby source.
|
||||
* @return The AST representing the source.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
|
||||
|
||||
|
@ -181,4 +186,103 @@ PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_
|
|||
*/
|
||||
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
|
||||
|
||||
/**
|
||||
* @mainpage
|
||||
*
|
||||
* Prism is a parser for the Ruby programming language. It is designed to be
|
||||
* portable, error tolerant, and maintainable. It is written in C99 and has no
|
||||
* dependencies. It is currently being integrated into
|
||||
* [CRuby](https://github.com/ruby/ruby),
|
||||
* [JRuby](https://github.com/jruby/jruby),
|
||||
* [TruffleRuby](https://github.com/oracle/truffleruby),
|
||||
* [Sorbet](https://github.com/sorbet/sorbet), and
|
||||
* [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
|
||||
*
|
||||
* @section getting-started Getting started
|
||||
*
|
||||
* If you're vendoring this project and compiling it statically then as long as
|
||||
* you have a C99 compiler you will be fine. If you're linking against it as
|
||||
* shared library, then you should compile with `-fvisibility=hidden` and
|
||||
* `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
|
||||
* visible.
|
||||
*
|
||||
* @section parsing Parsing
|
||||
*
|
||||
* In order to parse Ruby code, the structures and functions that you're going
|
||||
* to want to use and be aware of are:
|
||||
*
|
||||
* * pm_parser_t - the main parser structure
|
||||
* * pm_parser_init - initialize a parser
|
||||
* * pm_parse - parse and return the root node
|
||||
* * pm_node_destroy - deallocate the root node returned by `pm_parse`
|
||||
* * pm_parser_free - free the internal memory of the parser
|
||||
*
|
||||
* Putting all of this together would look something like:
|
||||
*
|
||||
* ```c
|
||||
* void parse(const uint8_t *source, size_t length) {
|
||||
* pm_parser_t parser;
|
||||
* pm_parser_init(&parser, source, length, NULL);
|
||||
*
|
||||
* pm_node_t *root = pm_parse(&parser);
|
||||
* printf("PARSED!\n");
|
||||
*
|
||||
* pm_node_destroy(root);
|
||||
* pm_parser_free(&parser);
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* All of the nodes "inherit" from `pm_node_t` by embedding those structures as
|
||||
* their first member. This means you can downcast and upcast any node in the
|
||||
* tree to a `pm_node_t`.
|
||||
*
|
||||
* @section serializing Serializing
|
||||
*
|
||||
* Prism provides the ability to serialize the AST and its related metadata into
|
||||
* a binary format. This format is designed to be portable to different
|
||||
* languages and runtimes so that you only need to make one FFI call in order to
|
||||
* parse Ruby code. The structures and functions that you're going to want to
|
||||
* use and be aware of are:
|
||||
*
|
||||
* * pm_buffer_t - a small buffer object that will hold the serialized AST
|
||||
* * pm_buffer_free - free the memory associated with the buffer
|
||||
* * pm_serialize - serialize the AST into a buffer
|
||||
* * pm_parse_serialize - parse and serialize the AST into a buffer
|
||||
*
|
||||
* Putting all of this together would look something like:
|
||||
*
|
||||
* ```c
|
||||
* void serialize(const uint8_t *source, size_t length) {
|
||||
* pm_buffer_t buffer = { 0 };
|
||||
*
|
||||
* pm_parse_serialize(source, length, &buffer, NULL);
|
||||
* printf("SERIALIZED!\n");
|
||||
*
|
||||
* pm_buffer_free(&buffer);
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @section inspecting Inspecting
|
||||
*
|
||||
* Prism provides the ability to inspect the AST by pretty-printing nodes. You
|
||||
* can do this with the `pm_prettyprint` function, which you would use like:
|
||||
*
|
||||
* ```c
|
||||
* void prettyprint(const uint8_t *source, size_t length) {
|
||||
* pm_parser_t parser;
|
||||
* pm_parser_init(&parser, source, length, NULL);
|
||||
*
|
||||
* pm_node_t *root = pm_parse(&parser);
|
||||
* pm_buffer_t buffer = { 0 };
|
||||
*
|
||||
* pm_prettyprint(&buffer, &parser, root);
|
||||
* printf("*.s%\n", (int) buffer.length, buffer.value);
|
||||
*
|
||||
* pm_buffer_free(&buffer);
|
||||
* pm_node_destroy(root);
|
||||
* pm_parser_free(&parser);
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
|
|
@ -4,11 +4,22 @@
|
|||
* This is the parser that is going to handle parsing regular expressions.
|
||||
*/
|
||||
typedef struct {
|
||||
/** A pointer to the start of the source that we are parsing. */
|
||||
const uint8_t *start;
|
||||
|
||||
/** A pointer to the current position in the source. */
|
||||
const uint8_t *cursor;
|
||||
|
||||
/** A pointer to the end of the source that we are parsing. */
|
||||
const uint8_t *end;
|
||||
|
||||
/** A list of named captures that we've found. */
|
||||
pm_string_list_t *named_captures;
|
||||
|
||||
/** Whether the encoding has changed from the default. */
|
||||
bool encoding_changed;
|
||||
|
||||
/** The encoding of the source. */
|
||||
pm_encoding_t *encoding;
|
||||
} pm_regexp_parser_t;
|
||||
|
||||
|
@ -318,6 +329,7 @@ typedef enum {
|
|||
* This is the set of options that are configurable on the regular expression.
|
||||
*/
|
||||
typedef struct {
|
||||
/** The current state of each option. */
|
||||
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
|
||||
} pm_regexp_options_t;
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file regexp.h
|
||||
*
|
||||
* A regular expression parser.
|
||||
*/
|
||||
#ifndef PRISM_REGEXP_H
|
||||
#define PRISM_REGEXP_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file ast.h
|
||||
*
|
||||
* The abstract syntax tree.
|
||||
*/
|
||||
#ifndef PRISM_AST_H
|
||||
#define PRISM_AST_H
|
||||
|
||||
|
@ -78,18 +83,37 @@ enum pm_node_type {
|
|||
PM_SCOPE_NODE
|
||||
};
|
||||
|
||||
/**
|
||||
* This is the type of node embedded in the node struct. We explicitly control
|
||||
* the size of it here to avoid having the variable-width enum.
|
||||
*/
|
||||
typedef uint16_t pm_node_type_t;
|
||||
|
||||
/**
|
||||
* These are the flags embedded in the node struct. We explicitly control the
|
||||
* size of it here to avoid having the variable-width enum.
|
||||
*/
|
||||
typedef uint16_t pm_node_flags_t;
|
||||
|
||||
// We store the flags enum in every node in the tree. Some flags are common to
|
||||
// all nodes (the ones listed below). Others are specific to certain node types.
|
||||
/**
|
||||
* We store the flags enum in every node in the tree. Some flags are common to
|
||||
* all nodes (the ones listed below). Others are specific to certain node types.
|
||||
*/
|
||||
#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
|
||||
|
||||
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
|
||||
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
|
||||
static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2));
|
||||
|
||||
// For easy access, we define some macros to check node type
|
||||
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
|
||||
/**
|
||||
* Cast the type to an enum to allow the compiler to provide exhaustiveness
|
||||
* checking.
|
||||
*/
|
||||
#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
|
||||
|
||||
/**
|
||||
* Return true if the type of the given node matches the given type.
|
||||
*/
|
||||
#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
|
||||
|
||||
/**
|
||||
|
@ -132,8 +156,11 @@ typedef struct pm_node {
|
|||
* @extends pm_node_t
|
||||
*/
|
||||
typedef struct pm_<%= node.human %> {
|
||||
/** The embedded base node. */
|
||||
pm_node_t base;
|
||||
<%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
|
||||
|
||||
/** <%= node.name %>#<%= field.name %> */
|
||||
<%= case field
|
||||
when Prism::NodeField, Prism::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
|
||||
when Prism::NodeListField then "struct pm_node_list #{field.name}"
|
||||
|
@ -162,6 +189,12 @@ typedef enum pm_<%= flag.human %> {
|
|||
} pm_<%= flag.human %>_t;
|
||||
<%- end -%>
|
||||
|
||||
/**
|
||||
* When we're serializing to Java, we want to skip serializing the location
|
||||
* fields as they won't be used by JRuby or TruffleRuby. This boolean allows us
|
||||
* to specify that through the environment. It will never be true except for in
|
||||
* those build systems.
|
||||
*/
|
||||
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_buffer.h
|
||||
*
|
||||
* A wrapper around a contiguous block of allocated memory.
|
||||
*/
|
||||
#ifndef PRISM_BUFFER_H
|
||||
#define PRISM_BUFFER_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_char.h
|
||||
*
|
||||
* Functions for working with characters and strings.
|
||||
*/
|
||||
#ifndef PRISM_CHAR_H
|
||||
#define PRISM_CHAR_H
|
||||
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
// The constant pool is a data structure that stores a set of strings. Each
|
||||
// string is assigned a unique id, which can be used to compare strings for
|
||||
// equality. This comparison ends up being much faster than strcmp, since it
|
||||
// only requires a single integer comparison.
|
||||
|
||||
/**
|
||||
* @file pm_constant_pool.h
|
||||
*
|
||||
* A data structure that stores a set of strings.
|
||||
*
|
||||
* Each string is assigned a unique id, which can be used to compare strings for
|
||||
* equality. This comparison ends up being much faster than strcmp, since it
|
||||
* only requires a single integer comparison.
|
||||
*/
|
||||
#ifndef PRISM_CONSTANT_POOL_H
|
||||
#define PRISM_CONSTANT_POOL_H
|
||||
|
||||
|
@ -14,12 +18,23 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* A constant id is a unique identifier for a constant in the constant pool.
|
||||
*/
|
||||
typedef uint32_t pm_constant_id_t;
|
||||
|
||||
/**
|
||||
* A list of constant IDs. Usually used to represent a set of locals.
|
||||
*/
|
||||
typedef struct {
|
||||
pm_constant_id_t *ids;
|
||||
/** The number of constant ids in the list. */
|
||||
size_t size;
|
||||
|
||||
/** The number of constant ids that have been allocated in the list. */
|
||||
size_t capacity;
|
||||
|
||||
/** The constant ids in the list. */
|
||||
pm_constant_id_t *ids;
|
||||
} pm_constant_id_list_t;
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_list.h
|
||||
*
|
||||
* An abstract linked list.
|
||||
*/
|
||||
#ifndef PRISM_LIST_H
|
||||
#define PRISM_LIST_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_memchr.h
|
||||
*
|
||||
* A custom memchr implementation.
|
||||
*/
|
||||
#ifndef PRISM_MEMCHR_H
|
||||
#define PRISM_MEMCHR_H
|
||||
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
// When compiling the syntax tree, it's necessary to know the line and column
|
||||
// of many nodes. This is necessary to support things like error messages,
|
||||
// tracepoints, etc.
|
||||
//
|
||||
// It's possible that we could store the start line, start column, end line, and
|
||||
// end column on every node in addition to the offsets that we already store,
|
||||
// but that would be quite a lot of memory overhead.
|
||||
|
||||
/**
|
||||
* @file pm_newline_list.h
|
||||
*
|
||||
* A list of byte offsets of newlines in a string.
|
||||
*
|
||||
* When compiling the syntax tree, it's necessary to know the line and column
|
||||
* of many nodes. This is necessary to support things like error messages,
|
||||
* tracepoints, etc.
|
||||
*
|
||||
* It's possible that we could store the start line, start column, end line, and
|
||||
* end column on every node in addition to the offsets that we already store,
|
||||
* but that would be quite a lot of memory overhead.
|
||||
*/
|
||||
#ifndef PRISM_NEWLINE_LIST_H
|
||||
#define PRISM_NEWLINE_LIST_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_state_stack.h
|
||||
*
|
||||
* A stack of boolean values.
|
||||
*/
|
||||
#ifndef PRISM_STATE_STACK_H
|
||||
#define PRISM_STATE_STACK_H
|
||||
|
||||
|
@ -30,6 +35,7 @@ void pm_state_stack_pop(pm_state_stack_t *stack);
|
|||
* Returns the value at the top of the stack.
|
||||
*
|
||||
* @param stack The stack to get the value from.
|
||||
* @return The value at the top of the stack.
|
||||
*/
|
||||
bool pm_state_stack_p(pm_state_stack_t *stack);
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_string.h
|
||||
*
|
||||
* A generic string type that can have various ownership semantics.
|
||||
*/
|
||||
#ifndef PRISM_STRING_H
|
||||
#define PRISM_STRING_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_string_list.h
|
||||
*
|
||||
* A list of strings.
|
||||
*/
|
||||
#ifndef PRISM_STRING_LIST_H
|
||||
#define PRISM_STRING_LIST_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_strncasecmp.h
|
||||
*
|
||||
* A custom strncasecmp implementation.
|
||||
*/
|
||||
#ifndef PRISM_STRNCASECMP_H
|
||||
#define PRISM_STRNCASECMP_H
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
/**
|
||||
* @file pm_strpbrk.h
|
||||
*
|
||||
* A custom strpbrk implementation.
|
||||
*/
|
||||
#ifndef PRISM_STRPBRK_H
|
||||
#define PRISM_STRPBRK_H
|
||||
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
/**
|
||||
* @file version.h
|
||||
*
|
||||
* The version of the Prism library.
|
||||
*/
|
||||
#ifndef PRISM_VERSION_H
|
||||
#define PRISM_VERSION_H
|
||||
|
||||
/**
|
||||
* The major version of the Prism library as an int.
|
||||
*/
|
||||
|
@ -17,3 +25,5 @@
|
|||
* The version of the Prism library as a constant string.
|
||||
*/
|
||||
#define PRISM_VERSION "0.16.0"
|
||||
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче