This commit is contained in:
Kevin Newton 2023-09-27 12:24:48 -04:00
Родитель 8ab56869a6
Коммит 4f73a7c2f7
99 изменённых файлов: 7870 добавлений и 7877 удалений

Просмотреть файл

@ -1,25 +1,25 @@
# frozen_string_literal: true
module YARP
# There are many files in YARP that are templated to handle every node type,
module Prism
# There are many files in prism that are templated to handle every node type,
# which means the files can end up being quite large. We autoload them to make
# our require speed faster since consuming libraries are unlikely to use all
# of these features.
autoload :BasicVisitor, "yarp/visitor"
autoload :Compiler, "yarp/compiler"
autoload :Debug, "yarp/debug"
autoload :DesugarCompiler, "yarp/desugar_compiler"
autoload :Dispatcher, "yarp/dispatcher"
autoload :DSL, "yarp/dsl"
autoload :LexCompat, "yarp/lex_compat"
autoload :LexRipper, "yarp/lex_compat"
autoload :MutationCompiler, "yarp/mutation_compiler"
autoload :NodeInspector, "yarp/node_inspector"
autoload :RipperCompat, "yarp/ripper_compat"
autoload :Pack, "yarp/pack"
autoload :Pattern, "yarp/pattern"
autoload :Serialize, "yarp/serialize"
autoload :Visitor, "yarp/visitor"
autoload :BasicVisitor, "prism/visitor"
autoload :Compiler, "prism/compiler"
autoload :Debug, "prism/debug"
autoload :DesugarCompiler, "prism/desugar_compiler"
autoload :Dispatcher, "prism/dispatcher"
autoload :DSL, "prism/dsl"
autoload :LexCompat, "prism/lex_compat"
autoload :LexRipper, "prism/lex_compat"
autoload :MutationCompiler, "prism/mutation_compiler"
autoload :NodeInspector, "prism/node_inspector"
autoload :RipperCompat, "prism/ripper_compat"
autoload :Pack, "prism/pack"
autoload :Pattern, "prism/pattern"
autoload :Serialize, "prism/serialize"
autoload :Visitor, "prism/visitor"
# Some of these constants are not meant to be exposed, so marking them as
# private here.
@ -47,18 +47,18 @@ module YARP
end
end
require_relative "yarp/node"
require_relative "yarp/node_ext"
require_relative "yarp/parse_result"
require_relative "yarp/parse_result/comments"
require_relative "yarp/parse_result/newlines"
require_relative "prism/node"
require_relative "prism/node_ext"
require_relative "prism/parse_result"
require_relative "prism/parse_result/comments"
require_relative "prism/parse_result/newlines"
# This is a Ruby implementation of the YARP parser. If we're running on CRuby
# and we haven't explicitly set the YARP_FFI_BACKEND environment variable, then
# This is a Ruby implementation of the prism parser. If we're running on CRuby
# and we haven't explicitly set the PRISM_FFI_BACKEND environment variable, then
# it's going to require the built library. Otherwise, it's going to require a
# module that uses FFI to call into the library.
if RUBY_ENGINE == "ruby" and !ENV["YARP_FFI_BACKEND"]
require "yarp/yarp"
if RUBY_ENGINE == "ruby" and !ENV["PRISM_FFI_BACKEND"]
require "prism/prism"
else
require_relative "yarp/ffi"
require_relative "prism/ffi"
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
# This module is used for testing and debugging and is not meant to be used by
# consumers of this library.
module Debug
@ -77,11 +77,11 @@ module YARP
end
end
# For the given source, parses with YARP and returns a list of all of the
# For the given source, parses with prism and returns a list of all of the
# sets of local variables that were encountered.
def self.yarp_locals(source)
def self.prism_locals(source)
locals = []
stack = [YARP.parse(source).value]
stack = [Prism.parse(source).value]
while (node = stack.pop)
case node
@ -91,7 +91,7 @@ module YARP
params = node.parameters
params = params&.parameters unless node.is_a?(DefNode)
# YARP places parameters in the same order that they appear in the
# prism places parameters in the same order that they appear in the
# source. CRuby places them in the order that they need to appear
# according to their own internal calling convention. We mimic that
# order here so that we can compare properly.
@ -147,7 +147,7 @@ module YARP
end
def self.newlines(source)
YARP.parse(source).source.offsets
Prism.parse(source).source.offsets
end
def self.parse_serialize_file(filepath)

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
# DesugarCompiler is a compiler that desugars Ruby code into a more primitive
# form. This is useful for consumers that want to deal with fewer node types.
class DesugarCompiler < MutationCompiler

Просмотреть файл

@ -6,7 +6,7 @@
require "rbconfig"
require "ffi"
module YARP
module Prism
BACKEND = :FFI
module LibRubyParser
@ -35,13 +35,13 @@ module YARP
def self.load_exported_functions_from(header, *functions)
File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
# We only want to attempt to load exported functions.
next unless line.start_with?("YP_EXPORTED_FUNCTION ")
next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
# We only want to load the functions that we are interested in.
next unless functions.any? { |function| line.include?(function) }
# Parse the function declaration.
unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
raise "Could not parse #{line}"
end
@ -67,35 +67,35 @@ module YARP
end
load_exported_functions_from(
"yarp.h",
"yp_version",
"yp_parse_serialize",
"yp_lex_serialize",
"yp_parse_lex_serialize"
"prism.h",
"pm_version",
"pm_parse_serialize",
"pm_lex_serialize",
"pm_parse_lex_serialize"
)
load_exported_functions_from(
"yarp/util/yp_buffer.h",
"yp_buffer_sizeof",
"yp_buffer_init",
"yp_buffer_value",
"yp_buffer_length",
"yp_buffer_free"
"prism/util/pm_buffer.h",
"pm_buffer_sizeof",
"pm_buffer_init",
"pm_buffer_value",
"pm_buffer_length",
"pm_buffer_free"
)
load_exported_functions_from(
"yarp/util/yp_string.h",
"yp_string_mapped_init",
"yp_string_free",
"yp_string_source",
"yp_string_length",
"yp_string_sizeof"
"prism/util/pm_string.h",
"pm_string_mapped_init",
"pm_string_free",
"pm_string_source",
"pm_string_length",
"pm_string_sizeof"
)
# This object represents a yp_buffer_t. We only use it as an opaque pointer,
# so it doesn't need to know the fields of yp_buffer_t.
class YPBuffer
SIZEOF = LibRubyParser.yp_buffer_sizeof
# This object represents a pm_buffer_t. We only use it as an opaque pointer,
# so it doesn't need to know the fields of pm_buffer_t.
class PrismBuffer
SIZEOF = LibRubyParser.pm_buffer_sizeof
attr_reader :pointer
@ -104,11 +104,11 @@ module YARP
end
def value
LibRubyParser.yp_buffer_value(pointer)
LibRubyParser.pm_buffer_value(pointer)
end
def length
LibRubyParser.yp_buffer_length(pointer)
LibRubyParser.pm_buffer_length(pointer)
end
def read
@ -121,19 +121,19 @@ module YARP
pointer = FFI::MemoryPointer.new(SIZEOF)
begin
raise unless LibRubyParser.yp_buffer_init(pointer)
raise unless LibRubyParser.pm_buffer_init(pointer)
yield new(pointer)
ensure
LibRubyParser.yp_buffer_free(pointer)
LibRubyParser.pm_buffer_free(pointer)
pointer.free
end
end
end
# This object represents a yp_string_t. We only use it as an opaque pointer,
# This object represents a pm_string_t. We only use it as an opaque pointer,
# so it doesn't have to be an FFI::Struct.
class YPString
SIZEOF = LibRubyParser.yp_string_sizeof
class PrismString
SIZEOF = LibRubyParser.pm_string_sizeof
attr_reader :pointer
@ -142,93 +142,93 @@ module YARP
end
def source
LibRubyParser.yp_string_source(pointer)
LibRubyParser.pm_string_source(pointer)
end
def length
LibRubyParser.yp_string_length(pointer)
LibRubyParser.pm_string_length(pointer)
end
def read
source.read_string(length)
end
# Yields a yp_string_t pointer to the given block.
# Yields a pm_string_t pointer to the given block.
def self.with(filepath, &block)
pointer = FFI::MemoryPointer.new(SIZEOF)
begin
raise unless LibRubyParser.yp_string_mapped_init(pointer, filepath)
raise unless LibRubyParser.pm_string_mapped_init(pointer, filepath)
yield new(pointer)
ensure
LibRubyParser.yp_string_free(pointer)
LibRubyParser.pm_string_free(pointer)
pointer.free
end
end
end
def self.dump_internal(source, source_size, filepath)
YPBuffer.with do |buffer|
PrismBuffer.with do |buffer|
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
yp_parse_serialize(source, source_size, buffer.pointer, metadata)
pm_parse_serialize(source, source_size, buffer.pointer, metadata)
buffer.read
end
end
end
# Mark the LibRubyParser module as private as it should only be called through
# the YARP module.
# the prism module.
private_constant :LibRubyParser
# The version constant is set by reading the result of calling yp_version.
VERSION = LibRubyParser.yp_version.read_string
# The version constant is set by reading the result of calling pm_version.
VERSION = LibRubyParser.pm_version.read_string
# Mirror the YARP.dump API by using the serialization API.
# Mirror the Prism.dump API by using the serialization API.
def self.dump(code, filepath = nil)
LibRubyParser.dump_internal(code, code.bytesize, filepath)
end
# Mirror the YARP.dump_file API by using the serialization API.
# Mirror the Prism.dump_file API by using the serialization API.
def self.dump_file(filepath)
LibRubyParser::YPString.with(filepath) do |string|
LibRubyParser::PrismString.with(filepath) do |string|
LibRubyParser.dump_internal(string.source, string.length, filepath)
end
end
# Mirror the YARP.lex API by using the serialization API.
# Mirror the Prism.lex API by using the serialization API.
def self.lex(code, filepath = nil)
LibRubyParser::YPBuffer.with do |buffer|
LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
Serialize.load_tokens(Source.new(code), buffer.read)
end
end
# Mirror the YARP.lex_file API by using the serialization API.
# Mirror the Prism.lex_file API by using the serialization API.
def self.lex_file(filepath)
LibRubyParser::YPString.with(filepath) do |string|
LibRubyParser::PrismString.with(filepath) do |string|
lex(string.read, filepath)
end
end
# Mirror the YARP.parse API by using the serialization API.
# Mirror the Prism.parse API by using the serialization API.
def self.parse(code, filepath = nil)
YARP.load(code, dump(code, filepath))
Prism.load(code, dump(code, filepath))
end
# Mirror the YARP.parse_file API by using the serialization API. This uses
# Mirror the Prism.parse_file API by using the serialization API. This uses
# native strings instead of Ruby strings because it allows us to use mmap when
# it is available.
def self.parse_file(filepath)
LibRubyParser::YPString.with(filepath) do |string|
LibRubyParser::PrismString.with(filepath) do |string|
parse(string.read, filepath)
end
end
# Mirror the YARP.parse_lex API by using the serialization API.
# Mirror the Prism.parse_lex API by using the serialization API.
def self.parse_lex(code, filepath = nil)
LibRubyParser::YPBuffer.with do |buffer|
LibRubyParser::PrismBuffer.with do |buffer|
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
LibRubyParser.pm_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
source = Source.new(code)
loader = Serialize::Loader.new(source, buffer.read)
@ -242,9 +242,9 @@ module YARP
end
end
# Mirror the YARP.parse_lex_file API by using the serialization API.
# Mirror the Prism.parse_lex_file API by using the serialization API.
def self.parse_lex_file(filepath)
LibRubyParser::YPString.with(filepath) do |string|
LibRubyParser::PrismString.with(filepath) do |string|
parse_lex(string.read, filepath)
end
end

Просмотреть файл

@ -2,14 +2,14 @@
require "delegate"
module YARP
# This class is responsible for lexing the source using YARP and then
module Prism
# This class is responsible for lexing the source using prism and then
# converting those tokens to be compatible with Ripper. In the vast majority
# of cases, this is a one-to-one mapping of the token type. Everything else
# generally lines up. However, there are a few cases that require special
# handling.
class LexCompat
# This is a mapping of YARP token types to Ripper token types. This is a
# This is a mapping of prism token types to Ripper token types. This is a
# many-to-one mapping because we split up our token types, whereas Ripper
# tends to group them.
RIPPER = {
@ -339,8 +339,8 @@ module YARP
# Heredocs that are dedenting heredocs are a little more complicated.
# Ripper outputs on_ignored_sp tokens for the whitespace that is being
# removed from the output. YARP only modifies the node itself and keeps
# the token the same. This simplifies YARP, but makes comparing against
# removed from the output. prism only modifies the node itself and keeps
# the token the same. This simplifies prism, but makes comparing against
# Ripper much harder because there is a length mismatch.
#
# Fortunately, we already have to pull out the heredoc tokens in order to
@ -563,7 +563,7 @@ module YARP
state = :default
heredoc_stack = [[]]
result = YARP.lex(source, @filepath)
result = Prism.lex(source, @filepath)
result_value = result.value
previous_state = nil
@ -650,7 +650,7 @@ module YARP
IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. YARP sets lex state
# lexed output is begin, when it should be end. prism sets lex state
# correctly to end state, but we want to be able to compare against
# Ripper's lexed state. So here, if it's a regexp end token, we
# output the state as the previous state, solely for the sake of
@ -706,7 +706,7 @@ module YARP
# The order in which tokens appear in our lexer is different from the
# order that they appear in Ripper. When we hit the declaration of a
# heredoc in YARP, we skip forward and lex the rest of the content of
# heredoc in prism, we skip forward and lex the rest of the content of
# the heredoc before going back and lexing at the end of the heredoc
# identifier.
#

Просмотреть файл

@ -1,8 +1,8 @@
# frozen_string_literal: true
# Here we are reopening the YARP module to provide methods on nodes that aren't
# Here we are reopening the prism module to provide methods on nodes that aren't
# templated and are meant as convenience methods.
module YARP
module Prism
class FloatNode < Node
# Returns the value of the node as a Ruby Float.
def value

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
# This object is responsible for generating the output for the inspect method
# implementations of child nodes.
class NodeInspector

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
module Pack
%i[
SPACE

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
# This represents a source of Ruby code that has been parsed. It is used in
# conjunction with locations to allow them to resolve line numbers and source
# ranges.
@ -71,7 +71,7 @@ module YARP
# Returns a string representation of this location.
def inspect
"#<YARP::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
"#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
end
# The source code that this location represents.
@ -162,7 +162,7 @@ module YARP
end
def inspect
"#<YARP::Comment @type=#{@type.inspect} @location=#{@location.inspect}>"
"#<Prism::Comment @type=#{@type.inspect} @location=#{@location.inspect}>"
end
end
@ -180,7 +180,7 @@ module YARP
end
def inspect
"#<YARP::ParseError @message=#{@message.inspect} @location=#{@location.inspect}>"
"#<Prism::ParseError @message=#{@message.inspect} @location=#{@location.inspect}>"
end
end
@ -198,7 +198,7 @@ module YARP
end
def inspect
"#<YARP::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect}>"
"#<Prism::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect}>"
end
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
class ParseResult
# When we've parsed the source, we have both the syntax tree and the list of
# comments that we found in the source. This class is responsible for

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
module YARP
module Prism
class ParseResult
# The :line tracepoint event gets fired whenever the Ruby VM encounters an
# expression on a new line. The types of expressions that can trigger this

Просмотреть файл

@ -1,24 +1,24 @@
# frozen_string_literal: true
module YARP
module Prism
# A pattern is an object that wraps a Ruby pattern matching expression. The
# expression would normally be passed to an `in` clause within a `case`
# expression or a rightward assignment expression. For example, in the
# following snippet:
#
# case node
# in ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]
# in ConstantPathNode[ConstantReadNode[name: :Prism], ConstantReadNode[name: :Pattern]]
# end
#
# the pattern is the `ConstantPathNode[...]` expression.
#
# The pattern gets compiled into an object that responds to #call by running
# the #compile method. This method itself will run back through YARP to
# the #compile method. This method itself will run back through Prism to
# parse the expression into a tree, then walk the tree to generate the
# necessary callable objects. For example, if you wanted to compile the
# expression above into a callable, you would:
#
# callable = YARP::Pattern.new("ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]").compile
# callable = Prism::Pattern.new("ConstantPathNode[ConstantReadNode[name: :Prism], ConstantReadNode[name: :Pattern]]").compile
# callable.call(node)
#
# The callable object returned by #compile is guaranteed to respond to #call
@ -32,7 +32,7 @@ module YARP
#
# If the query given to the initializer cannot be compiled into a valid
# matcher (either because of a syntax error or because it is using syntax we
# do not yet support) then a YARP::Pattern::CompilationError will be
# do not yet support) then a Prism::Pattern::CompilationError will be
# raised.
class Pattern
# Raised when the query given to a pattern is either invalid Ruby syntax or
@ -40,15 +40,15 @@ module YARP
class CompilationError < StandardError
def initialize(repr)
super(<<~ERROR)
YARP was unable to compile the pattern you provided into a usable
prism was unable to compile the pattern you provided into a usable
expression. It failed on to understand the node represented by:
#{repr}
Note that not all syntax supported by Ruby's pattern matching syntax
is also supported by YARP's patterns. If you're using some syntax
is also supported by prism's patterns. If you're using some syntax
that you believe should be supported, please open an issue on
GitHub at https://github.com/ruby/yarp/issues/new.
GitHub at https://github.com/ruby/prism/issues/new.
ERROR
end
end
@ -61,7 +61,7 @@ module YARP
end
def compile
result = YARP.parse("case nil\nin #{query}\nend")
result = Prism.parse("case nil\nin #{query}\nend")
compile_node(result.value.statements.body.last.conditions.last.pattern)
end
@ -126,11 +126,11 @@ module YARP
combine_or(compile_node(node.left), compile_node(node.right))
end
# in YARP::ConstantReadNode
# in Prism::ConstantReadNode
def compile_constant_path_node(node)
parent = node.parent
if parent.is_a?(ConstantReadNode) && parent.slice == "YARP"
if parent.is_a?(ConstantReadNode) && parent.slice == "Prism"
compile_node(node.child)
else
compile_error(node)
@ -142,8 +142,8 @@ module YARP
def compile_constant_read_node(node)
value = node.slice
if YARP.const_defined?(value, false)
clazz = YARP.const_get(value)
if Prism.const_defined?(value, false)
clazz = Prism.const_get(value)
->(other) { clazz === other }
elsif Object.const_defined?(value, false)

Просмотреть файл

@ -1,13 +1,13 @@
# frozen_string_literal: true
Gem::Specification.new do |spec|
spec.name = "yarp"
spec.name = "prism"
spec.version = "0.12.0"
spec.authors = ["Shopify"]
spec.email = ["ruby@shopify.com"]
spec.summary = "Yet Another Ruby Parser"
spec.homepage = "https://github.com/ruby/yarp"
spec.summary = "Prism Ruby parser"
spec.homepage = "https://github.com/ruby/prism"
spec.license = "MIT"
spec.required_ruby_version = ">= 3.0.0"
@ -33,59 +33,59 @@ Gem::Specification.new do |spec|
"docs/ruby_api.md",
"docs/serialization.md",
"docs/testing.md",
"ext/yarp/api_node.c",
"ext/yarp/api_pack.c",
"ext/yarp/extension.c",
"ext/yarp/extension.h",
"include/yarp.h",
"include/yarp/ast.h",
"include/yarp/defines.h",
"include/yarp/diagnostic.h",
"include/yarp/enc/yp_encoding.h",
"include/yarp/node.h",
"include/yarp/pack.h",
"include/yarp/parser.h",
"include/yarp/regexp.h",
"include/yarp/unescape.h",
"include/yarp/util/yp_buffer.h",
"include/yarp/util/yp_char.h",
"include/yarp/util/yp_constant_pool.h",
"include/yarp/util/yp_list.h",
"include/yarp/util/yp_memchr.h",
"include/yarp/util/yp_newline_list.h",
"include/yarp/util/yp_state_stack.h",
"include/yarp/util/yp_string.h",
"include/yarp/util/yp_string_list.h",
"include/yarp/util/yp_strpbrk.h",
"include/yarp/version.h",
"lib/yarp.rb",
"lib/yarp/compiler.rb",
"lib/yarp/debug.rb",
"lib/yarp/desugar_compiler.rb",
"lib/yarp/dispatcher.rb",
"lib/yarp/dsl.rb",
"lib/yarp/ffi.rb",
"lib/yarp/lex_compat.rb",
"lib/yarp/mutation_compiler.rb",
"lib/yarp/node.rb",
"lib/yarp/node_ext.rb",
"lib/yarp/node_inspector.rb",
"lib/yarp/pack.rb",
"lib/yarp/parse_result.rb",
"lib/yarp/pattern.rb",
"lib/yarp/ripper_compat.rb",
"lib/yarp/serialize.rb",
"lib/yarp/parse_result/comments.rb",
"lib/yarp/parse_result/newlines.rb",
"lib/yarp/visitor.rb",
"ext/prism/api_node.c",
"ext/prism/api_pack.c",
"ext/prism/extension.c",
"ext/prism/extension.h",
"include/prism.h",
"include/prism/ast.h",
"include/prism/defines.h",
"include/prism/diagnostic.h",
"include/prism/enc/pm_encoding.h",
"include/prism/node.h",
"include/prism/pack.h",
"include/prism/parser.h",
"include/prism/regexp.h",
"include/prism/unescape.h",
"include/prism/util/pm_buffer.h",
"include/prism/util/pm_char.h",
"include/prism/util/pm_constant_pool.h",
"include/prism/util/pm_list.h",
"include/prism/util/pm_memchr.h",
"include/prism/util/pm_newline_list.h",
"include/prism/util/pm_state_stack.h",
"include/prism/util/pm_string.h",
"include/prism/util/pm_string_list.h",
"include/prism/util/pm_strpbrk.h",
"include/prism/version.h",
"lib/prism.rb",
"lib/prism/compiler.rb",
"lib/prism/debug.rb",
"lib/prism/desugar_compiler.rb",
"lib/prism/dispatcher.rb",
"lib/prism/dsl.rb",
"lib/prism/ffi.rb",
"lib/prism/lex_compat.rb",
"lib/prism/mutation_compiler.rb",
"lib/prism/node.rb",
"lib/prism/node_ext.rb",
"lib/prism/node_inspector.rb",
"lib/prism/pack.rb",
"lib/prism/parse_result.rb",
"lib/prism/pattern.rb",
"lib/prism/ripper_compat.rb",
"lib/prism/serialize.rb",
"lib/prism/parse_result/comments.rb",
"lib/prism/parse_result/newlines.rb",
"lib/prism/visitor.rb",
"src/diagnostic.c",
"src/enc/yp_big5.c",
"src/enc/yp_euc_jp.c",
"src/enc/yp_gbk.c",
"src/enc/yp_shift_jis.c",
"src/enc/yp_tables.c",
"src/enc/yp_unicode.c",
"src/enc/yp_windows_31j.c",
"src/enc/pm_big5.c",
"src/enc/pm_euc_jp.c",
"src/enc/pm_gbk.c",
"src/enc/pm_shift_jis.c",
"src/enc/pm_tables.c",
"src/enc/pm_unicode.c",
"src/enc/pm_windows_31j.c",
"src/node.c",
"src/pack.c",
"src/prettyprint.c",
@ -93,21 +93,21 @@ Gem::Specification.new do |spec|
"src/serialize.c",
"src/token_type.c",
"src/unescape.c",
"src/util/yp_buffer.c",
"src/util/yp_char.c",
"src/util/yp_constant_pool.c",
"src/util/yp_list.c",
"src/util/yp_memchr.c",
"src/util/yp_newline_list.c",
"src/util/yp_state_stack.c",
"src/util/yp_string.c",
"src/util/yp_string_list.c",
"src/util/yp_strncasecmp.c",
"src/util/yp_strpbrk.c",
"src/yarp.c",
"yarp.gemspec",
"src/util/pm_buffer.c",
"src/util/pm_char.c",
"src/util/pm_constant_pool.c",
"src/util/pm_list.c",
"src/util/pm_memchr.c",
"src/util/pm_newline_list.c",
"src/util/pm_state_stack.c",
"src/util/pm_string.c",
"src/util/pm_string_list.c",
"src/util/pm_strncasecmp.c",
"src/util/pm_strpbrk.c",
"src/prism.c",
"prism.gemspec",
]
spec.extensions = ["ext/yarp/extconf.rb"]
spec.extensions = ["ext/prism/extconf.rb"]
spec.metadata["allowed_push_host"] = "https://rubygems.org"
end

Просмотреть файл

@ -2,14 +2,14 @@
require "ripper"
module YARP
# This class is meant to provide a compatibility layer between YARP and
module Prism
# This class is meant to provide a compatibility layer between prism and
# Ripper. It functions by parsing the entire tree first and then walking it
# and executing each of the Ripper callbacks as it goes.
#
# This class is going to necessarily be slower than the native Ripper API. It
# is meant as a stopgap until developers migrate to using YARP. It is also
# meant as a test harness for the YARP parser.
# is meant as a stopgap until developers migrate to using prism. It is also
# meant as a test harness for the prism parser.
class RipperCompat
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
# returns the arrays of [type, *children].
@ -156,7 +156,7 @@ module YARP
end
def result
@result ||= YARP.parse(source)
@result ||= Prism.parse(source)
end
def _dispatch0; end

Просмотреть файл

@ -1,9 +1,9 @@
#include "yarp/extension.h"
#include "prism/extension.h"
static VALUE rb_cYARP;
static VALUE rb_cYARPPack;
static VALUE rb_cYARPPackDirective;
static VALUE rb_cYARPPackFormat;
static VALUE rb_cPrism;
static VALUE rb_cPrismPack;
static VALUE rb_cPrismPackDirective;
static VALUE rb_cPrismPackFormat;
static VALUE v3_2_0_symbol;
static VALUE pack_symbol;
@ -20,49 +20,49 @@ static VALUE unpack_symbol;
#endif
static VALUE
pack_type_to_symbol(yp_pack_type type) {
pack_type_to_symbol(pm_pack_type type) {
switch (type) {
case YP_PACK_SPACE:
case PM_PACK_SPACE:
return ID2SYM(rb_intern("SPACE"));
case YP_PACK_COMMENT:
case PM_PACK_COMMENT:
return ID2SYM(rb_intern("COMMENT"));
case YP_PACK_INTEGER:
case PM_PACK_INTEGER:
return ID2SYM(rb_intern("INTEGER"));
case YP_PACK_UTF8:
case PM_PACK_UTF8:
return ID2SYM(rb_intern("UTF8"));
case YP_PACK_BER:
case PM_PACK_BER:
return ID2SYM(rb_intern("BER"));
case YP_PACK_FLOAT:
case PM_PACK_FLOAT:
return ID2SYM(rb_intern("FLOAT"));
case YP_PACK_STRING_SPACE_PADDED:
case PM_PACK_STRING_SPACE_PADDED:
return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
case YP_PACK_STRING_NULL_PADDED:
case PM_PACK_STRING_NULL_PADDED:
return ID2SYM(rb_intern("STRING_NULL_PADDED"));
case YP_PACK_STRING_NULL_TERMINATED:
case PM_PACK_STRING_NULL_TERMINATED:
return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
case YP_PACK_STRING_MSB:
case PM_PACK_STRING_MSB:
return ID2SYM(rb_intern("STRING_MSB"));
case YP_PACK_STRING_LSB:
case PM_PACK_STRING_LSB:
return ID2SYM(rb_intern("STRING_LSB"));
case YP_PACK_STRING_HEX_HIGH:
case PM_PACK_STRING_HEX_HIGH:
return ID2SYM(rb_intern("STRING_HEX_HIGH"));
case YP_PACK_STRING_HEX_LOW:
case PM_PACK_STRING_HEX_LOW:
return ID2SYM(rb_intern("STRING_HEX_LOW"));
case YP_PACK_STRING_UU:
case PM_PACK_STRING_UU:
return ID2SYM(rb_intern("STRING_UU"));
case YP_PACK_STRING_MIME:
case PM_PACK_STRING_MIME:
return ID2SYM(rb_intern("STRING_MIME"));
case YP_PACK_STRING_BASE64:
case PM_PACK_STRING_BASE64:
return ID2SYM(rb_intern("STRING_BASE64"));
case YP_PACK_STRING_FIXED:
case PM_PACK_STRING_FIXED:
return ID2SYM(rb_intern("STRING_FIXED"));
case YP_PACK_STRING_POINTER:
case PM_PACK_STRING_POINTER:
return ID2SYM(rb_intern("STRING_POINTER"));
case YP_PACK_MOVE:
case PM_PACK_MOVE:
return ID2SYM(rb_intern("MOVE"));
case YP_PACK_BACK:
case PM_PACK_BACK:
return ID2SYM(rb_intern("BACK"));
case YP_PACK_NULL:
case PM_PACK_NULL:
return ID2SYM(rb_intern("NULL"));
default:
return Qnil;
@ -70,13 +70,13 @@ pack_type_to_symbol(yp_pack_type type) {
}
static VALUE
pack_signed_to_symbol(yp_pack_signed signed_type) {
pack_signed_to_symbol(pm_pack_signed signed_type) {
switch (signed_type) {
case YP_PACK_UNSIGNED:
case PM_PACK_UNSIGNED:
return ID2SYM(rb_intern("UNSIGNED"));
case YP_PACK_SIGNED:
case PM_PACK_SIGNED:
return ID2SYM(rb_intern("SIGNED"));
case YP_PACK_SIGNED_NA:
case PM_PACK_SIGNED_NA:
return ID2SYM(rb_intern("SIGNED_NA"));
default:
return Qnil;
@ -84,17 +84,17 @@ pack_signed_to_symbol(yp_pack_signed signed_type) {
}
static VALUE
pack_endian_to_symbol(yp_pack_endian endian) {
pack_endian_to_symbol(pm_pack_endian endian) {
switch (endian) {
case YP_PACK_AGNOSTIC_ENDIAN:
case PM_PACK_AGNOSTIC_ENDIAN:
return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
case YP_PACK_LITTLE_ENDIAN:
case PM_PACK_LITTLE_ENDIAN:
return ID2SYM(rb_intern("LITTLE_ENDIAN"));
case YP_PACK_BIG_ENDIAN:
case PM_PACK_BIG_ENDIAN:
return ID2SYM(rb_intern("BIG_ENDIAN"));
case YP_PACK_NATIVE_ENDIAN:
case PM_PACK_NATIVE_ENDIAN:
return ID2SYM(rb_intern("NATIVE_ENDIAN"));
case YP_PACK_ENDIAN_NA:
case PM_PACK_ENDIAN_NA:
return ID2SYM(rb_intern("ENDIAN_NA"));
default:
return Qnil;
@ -102,27 +102,27 @@ pack_endian_to_symbol(yp_pack_endian endian) {
}
static VALUE
pack_size_to_symbol(yp_pack_size size) {
pack_size_to_symbol(pm_pack_size size) {
switch (size) {
case YP_PACK_SIZE_SHORT:
case PM_PACK_SIZE_SHORT:
return ID2SYM(rb_intern("SIZE_SHORT"));
case YP_PACK_SIZE_INT:
case PM_PACK_SIZE_INT:
return ID2SYM(rb_intern("SIZE_INT"));
case YP_PACK_SIZE_LONG:
case PM_PACK_SIZE_LONG:
return ID2SYM(rb_intern("SIZE_LONG"));
case YP_PACK_SIZE_LONG_LONG:
case PM_PACK_SIZE_LONG_LONG:
return ID2SYM(rb_intern("SIZE_LONG_LONG"));
case YP_PACK_SIZE_8:
case PM_PACK_SIZE_8:
return ID2SYM(rb_intern("SIZE_8"));
case YP_PACK_SIZE_16:
case PM_PACK_SIZE_16:
return ID2SYM(rb_intern("SIZE_16"));
case YP_PACK_SIZE_32:
case PM_PACK_SIZE_32:
return ID2SYM(rb_intern("SIZE_32"));
case YP_PACK_SIZE_64:
case PM_PACK_SIZE_64:
return ID2SYM(rb_intern("SIZE_64"));
case YP_PACK_SIZE_P:
case PM_PACK_SIZE_P:
return ID2SYM(rb_intern("SIZE_P"));
case YP_PACK_SIZE_NA:
case PM_PACK_SIZE_NA:
return ID2SYM(rb_intern("SIZE_NA"));
default:
return Qnil;
@ -130,15 +130,15 @@ pack_size_to_symbol(yp_pack_size size) {
}
static VALUE
pack_length_type_to_symbol(yp_pack_length_type length_type) {
pack_length_type_to_symbol(pm_pack_length_type length_type) {
switch (length_type) {
case YP_PACK_LENGTH_FIXED:
case PM_PACK_LENGTH_FIXED:
return ID2SYM(rb_intern("LENGTH_FIXED"));
case YP_PACK_LENGTH_MAX:
case PM_PACK_LENGTH_MAX:
return ID2SYM(rb_intern("LENGTH_MAX"));
case YP_PACK_LENGTH_RELATIVE:
case PM_PACK_LENGTH_RELATIVE:
return ID2SYM(rb_intern("LENGTH_RELATIVE"));
case YP_PACK_LENGTH_NA:
case PM_PACK_LENGTH_NA:
return ID2SYM(rb_intern("LENGTH_NA"));
default:
return Qnil;
@ -146,16 +146,16 @@ pack_length_type_to_symbol(yp_pack_length_type length_type) {
}
static VALUE
pack_encoding_to_ruby(yp_pack_encoding encoding) {
pack_encoding_to_ruby(pm_pack_encoding encoding) {
int index;
switch (encoding) {
case YP_PACK_ENCODING_ASCII_8BIT:
case PM_PACK_ENCODING_ASCII_8BIT:
index = rb_ascii8bit_encindex();
break;
case YP_PACK_ENCODING_US_ASCII:
case PM_PACK_ENCODING_US_ASCII:
index = rb_usascii_encindex();
break;
case YP_PACK_ENCODING_UTF_8:
case PM_PACK_ENCODING_UTF_8:
index = rb_utf8_encindex();
break;
default:
@ -170,11 +170,11 @@ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_
rb_raise(rb_eArgError, "invalid version");
}
yp_pack_variant variant;
pm_pack_variant variant;
if (variant_symbol == pack_symbol) {
variant = YP_PACK_VARIANT_PACK;
variant = PM_PACK_VARIANT_PACK;
} else if (variant_symbol == unpack_symbol) {
variant = YP_PACK_VARIANT_UNPACK;
variant = PM_PACK_VARIANT_UNPACK;
} else {
rb_raise(rb_eArgError, "invalid variant");
}
@ -183,43 +183,43 @@ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_
const char *format = RSTRING_PTR(format_string);
const char *format_end = format + RSTRING_LEN(format_string);
yp_pack_encoding encoding = YP_PACK_ENCODING_START;
pm_pack_encoding encoding = PM_PACK_ENCODING_START;
VALUE directives_array = rb_ary_new();
while (format < format_end) {
yp_pack_type type;
yp_pack_signed signed_type;
yp_pack_endian endian;
yp_pack_size size;
yp_pack_length_type length_type;
pm_pack_type type;
pm_pack_signed signed_type;
pm_pack_endian endian;
pm_pack_size size;
pm_pack_length_type length_type;
uint64_t length;
const char *directive_start = format;
yp_pack_result parse_result = yp_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
pm_pack_result parse_result = pm_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
&size, &length_type, &length, &encoding);
const char *directive_end = format;
switch (parse_result) {
case YP_PACK_OK:
case PM_PACK_OK:
break;
case YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
case PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
rb_raise(rb_eArgError, "unsupported directive");
case YP_PACK_ERROR_UNKNOWN_DIRECTIVE:
case PM_PACK_ERROR_UNKNOWN_DIRECTIVE:
rb_raise(rb_eArgError, "unsupported directive");
case YP_PACK_ERROR_LENGTH_TOO_BIG:
case PM_PACK_ERROR_LENGTH_TOO_BIG:
rb_raise(rb_eRangeError, "pack length too big");
case YP_PACK_ERROR_BANG_NOT_ALLOWED:
case PM_PACK_ERROR_BANG_NOT_ALLOWED:
rb_raise(rb_eRangeError, "bang not allowed");
case YP_PACK_ERROR_DOUBLE_ENDIAN:
case PM_PACK_ERROR_DOUBLE_ENDIAN:
rb_raise(rb_eRangeError, "double endian");
default:
rb_bug("parse result");
}
if (type == YP_PACK_END) {
if (type == PM_PACK_END) {
break;
}
@ -233,22 +233,22 @@ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_
pack_length_type_to_symbol(length_type),
UINT64T2NUM(length) };
rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cYARPPackDirective));
rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cPrismPackDirective));
}
VALUE format_args[2];
format_args[0] = directives_array;
format_args[1] = pack_encoding_to_ruby(encoding);
return rb_class_new_instance(2, format_args, rb_cYARPPackFormat);
return rb_class_new_instance(2, format_args, rb_cPrismPackFormat);
}
void
Init_yarp_pack(void) {
rb_cYARP = rb_define_module("YARP");
rb_cYARPPack = rb_define_module_under(rb_cYARP, "Pack");
rb_cYARPPackDirective = rb_define_class_under(rb_cYARPPack, "Directive", rb_cObject);
rb_cYARPPackFormat = rb_define_class_under(rb_cYARPPack, "Format", rb_cObject);
rb_define_singleton_method(rb_cYARPPack, "parse", pack_parse, 3);
Init_prism_pack(void) {
rb_cPrism = rb_define_module("Prism");
rb_cPrismPack = rb_define_module_under(rb_cPrism, "Pack");
rb_cPrismPackDirective = rb_define_class_under(rb_cPrismPack, "Directive", rb_cObject);
rb_cPrismPackFormat = rb_define_class_under(rb_cPrismPack, "Format", rb_cObject);
rb_define_singleton_method(rb_cPrismPack, "parse", pack_parse, 3);
v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
pack_symbol = ID2SYM(rb_intern("pack"));

Просмотреть файл

@ -1772,7 +1772,7 @@ nodes:
type: location
- name: content_loc
type: location
semantic_field: true # https://github.com/ruby/yarp/issues/1452
semantic_field: true # https://github.com/ruby/prism/issues/1452
- name: closing_loc
type: location
- name: unescaped
@ -2093,7 +2093,7 @@ nodes:
type: location
- name: content_loc
type: location
semantic_field: true # https://github.com/ruby/yarp/issues/1452
semantic_field: true # https://github.com/ruby/prism/issues/1452
- name: closing_loc
type: location
- name: unescaped
@ -2287,10 +2287,10 @@ nodes:
kind: StringFlags
- name: opening_loc
type: location?
semantic_field: true # https://github.com/ruby/yarp/issues/1452
semantic_field: true # https://github.com/ruby/prism/issues/1452
- name: content_loc
type: location
semantic_field: true # https://github.com/ruby/yarp/issues/1452
semantic_field: true # https://github.com/ruby/prism/issues/1452
- name: closing_loc
type: location?
- name: unescaped

Просмотреть файл

@ -1,7 +1,7 @@
#ifndef YARP_DEFINES_H
#define YARP_DEFINES_H
#ifndef PRISM_DEFINES_H
#define PRISM_DEFINES_H
// This file should be included first by any *.h or *.c in YARP
// This file should be included first by any *.h or *.c in prism
#include <ctype.h>
#include <stdarg.h>
@ -10,24 +10,24 @@
#include <stdio.h>
#include <string.h>
// YP_EXPORTED_FUNCTION
#ifndef YP_EXPORTED_FUNCTION
# ifdef YP_EXPORT_SYMBOLS
// PRISM_EXPORTED_FUNCTION
#ifndef PRISM_EXPORTED_FUNCTION
# ifdef PRISM_EXPORT_SYMBOLS
# ifdef _WIN32
# define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
# else
# define YP_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
# endif
# else
# define YP_EXPORTED_FUNCTION
# define PRISM_EXPORTED_FUNCTION
# endif
#endif
// YP_ATTRIBUTE_UNUSED
// PRISM_ATTRIBUTE_UNUSED
#if defined(__GNUC__)
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
#else
# define YP_ATTRIBUTE_UNUSED
# define PRISM_ATTRIBUTE_UNUSED
#endif
// inline
@ -40,6 +40,6 @@
# define snprintf _snprintf
#endif
int yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
#endif

Просмотреть файл

@ -1,4 +1,4 @@
#include "yarp/diagnostic.h"
#include "prism/diagnostic.h"
/*
## Message composition
@ -39,7 +39,7 @@
- e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
## Error names (YP_ERR_*)
## Error names (PM_ERR_*)
- When appropriate, prefer node name to token name.
- e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
@ -48,213 +48,213 @@
- Try to order the words in the name from more general to more specific,
- e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
- When in doubt, look for similar patterns and name them so that they are grouped when lexically
sorted. See YP_ERR_ARGUMENT_NO_FORWARDING_* for an example.
sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
*/
static const char* const diagnostic_messages[YP_DIAGNOSTIC_ID_LEN] = {
[YP_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
[YP_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
[YP_ERR_ARGUMENT_AFTER_BLOCK] = "Unexpected argument after a block argument",
[YP_ERR_ARGUMENT_BARE_HASH] = "Unexpected bare hash argument",
[YP_ERR_ARGUMENT_BLOCK_MULTI] = "Multiple block arguments; only one block is allowed",
[YP_ERR_ARGUMENT_FORMAL_CLASS] = "Invalid formal argument; formal argument cannot be a class variable",
[YP_ERR_ARGUMENT_FORMAL_CONSTANT] = "Invalid formal argument; formal argument cannot be a constant",
[YP_ERR_ARGUMENT_FORMAL_GLOBAL] = "Invalid formal argument; formal argument cannot be a global variable",
[YP_ERR_ARGUMENT_FORMAL_IVAR] = "Invalid formal argument; formal argument cannot be an instance variable",
[YP_ERR_ARGUMENT_NO_FORWARDING_AMP] = "Unexpected `&` when the parent method is not forwarding",
[YP_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = "Unexpected `...` when the parent method is not forwarding",
[YP_ERR_ARGUMENT_NO_FORWARDING_STAR] = "Unexpected `*` when the parent method is not forwarding",
[YP_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = "Unexpected `*` splat argument after a `**` keyword splat argument",
[YP_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = "Unexpected `*` splat argument after a `*` splat argument",
[YP_ERR_ARGUMENT_TERM_PAREN] = "Expected a `)` to close the arguments",
[YP_ERR_ARGUMENT_UNEXPECTED_BLOCK] = "Unexpected `{` after a method call without parenthesis",
[YP_ERR_ARRAY_ELEMENT] = "Expected an element for the array",
[YP_ERR_ARRAY_EXPRESSION] = "Expected an expression for the array element",
[YP_ERR_ARRAY_EXPRESSION_AFTER_STAR] = "Expected an expression after `*` in the array",
[YP_ERR_ARRAY_SEPARATOR] = "Expected a `,` separator for the array elements",
[YP_ERR_ARRAY_TERM] = "Expected a `]` to close the array",
[YP_ERR_BEGIN_LONELY_ELSE] = "Unexpected `else` in `begin` block; a `rescue` clause must precede `else`",
[YP_ERR_BEGIN_TERM] = "Expected an `end` to close the `begin` statement",
[YP_ERR_BEGIN_UPCASE_BRACE] = "Expected a `{` after `BEGIN`",
[YP_ERR_BEGIN_UPCASE_TERM] = "Expected a `}` to close the `BEGIN` statement",
[YP_ERR_BEGIN_UPCASE_TOPLEVEL] = "BEGIN is permitted only at toplevel",
[YP_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = "Expected a local variable name in the block parameters",
[YP_ERR_BLOCK_PARAM_PIPE_TERM] = "Expected the block parameters to end with `|`",
[YP_ERR_BLOCK_TERM_BRACE] = "Expected a block beginning with `{` to end with `}`",
[YP_ERR_BLOCK_TERM_END] = "Expected a block beginning with `do` to end with `end`",
[YP_ERR_CANNOT_PARSE_EXPRESSION] = "Cannot parse the expression",
[YP_ERR_CANNOT_PARSE_STRING_PART] = "Cannot parse the string part",
[YP_ERR_CASE_EXPRESSION_AFTER_CASE] = "Expected an expression after `case`",
[YP_ERR_CASE_EXPRESSION_AFTER_WHEN] = "Expected an expression after `when`",
[YP_ERR_CASE_MISSING_CONDITIONS] = "Expected a `when` or `in` clause after `case`",
[YP_ERR_CASE_TERM] = "Expected an `end` to close the `case` statement",
[YP_ERR_CLASS_IN_METHOD] = "Unexpected class definition in a method body",
[YP_ERR_CLASS_NAME] = "Expected a constant name after `class`",
[YP_ERR_CLASS_SUPERCLASS] = "Expected a superclass after `<`",
[YP_ERR_CLASS_TERM] = "Expected an `end` to close the `class` statement",
[YP_ERR_CONDITIONAL_ELSIF_PREDICATE] = "Expected a predicate expression for the `elsif` statement",
[YP_ERR_CONDITIONAL_IF_PREDICATE] = "Expected a predicate expression for the `if` statement",
[YP_ERR_CONDITIONAL_PREDICATE_TERM] = "Expected `then` or `;` or '\n'",
[YP_ERR_CONDITIONAL_TERM] = "Expected an `end` to close the conditional clause",
[YP_ERR_CONDITIONAL_TERM_ELSE] = "Expected an `end` to close the `else` clause",
[YP_ERR_CONDITIONAL_UNLESS_PREDICATE] = "Expected a predicate expression for the `unless` statement",
[YP_ERR_CONDITIONAL_UNTIL_PREDICATE] = "Expected a predicate expression for the `until` statement",
[YP_ERR_CONDITIONAL_WHILE_PREDICATE] = "Expected a predicate expression for the `while` statement",
[YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = "Expected a constant after the `::` operator",
[YP_ERR_DEF_ENDLESS] = "Could not parse the endless method body",
[YP_ERR_DEF_ENDLESS_SETTER] = "Invalid method name; a setter method cannot be defined in an endless method definition",
[YP_ERR_DEF_NAME] = "Expected a method name",
[YP_ERR_DEF_NAME_AFTER_RECEIVER] = "Expected a method name after the receiver",
[YP_ERR_DEF_PARAMS_TERM] = "Expected a delimiter to close the parameters",
[YP_ERR_DEF_PARAMS_TERM_PAREN] = "Expected a `)` to close the parameters",
[YP_ERR_DEF_RECEIVER] = "Expected a receiver for the method definition",
[YP_ERR_DEF_RECEIVER_TERM] = "Expected a `.` or `::` after the receiver in a method definition",
[YP_ERR_DEF_TERM] = "Expected an `end` to close the `def` statement",
[YP_ERR_DEFINED_EXPRESSION] = "Expected an expression after `defined?`",
[YP_ERR_EMBDOC_TERM] = "Could not find a terminator for the embedded document",
[YP_ERR_EMBEXPR_END] = "Expected a `}` to close the embedded expression",
[YP_ERR_EMBVAR_INVALID] = "Invalid embedded variable",
[YP_ERR_END_UPCASE_BRACE] = "Expected a `{` after `END`",
[YP_ERR_END_UPCASE_TERM] = "Expected a `}` to close the `END` statement",
[YP_ERR_ESCAPE_INVALID_CONTROL] = "Invalid control escape sequence",
[YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = "Invalid control escape sequence; control cannot be repeated",
[YP_ERR_ESCAPE_INVALID_HEXADECIMAL] = "Invalid hexadecimal escape sequence",
[YP_ERR_ESCAPE_INVALID_META] = "Invalid meta escape sequence",
[YP_ERR_ESCAPE_INVALID_META_REPEAT] = "Invalid meta escape sequence; meta cannot be repeated",
[YP_ERR_ESCAPE_INVALID_UNICODE] = "Invalid Unicode escape sequence",
[YP_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = "Invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags",
[YP_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = "Invalid Unicode escape sequence; multiple codepoints are not allowed in a character literal",
[YP_ERR_ESCAPE_INVALID_UNICODE_LONG] = "Invalid Unicode escape sequence; maximum length is 6 digits",
[YP_ERR_ESCAPE_INVALID_UNICODE_TERM] = "Invalid Unicode escape sequence; needs closing `}`",
[YP_ERR_EXPECT_ARGUMENT] = "Expected an argument",
[YP_ERR_EXPECT_EOL_AFTER_STATEMENT] = "Expected a newline or semicolon after the statement",
[YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = "Expected an expression after `&&=`",
[YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = "Expected an expression after `||=`",
[YP_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = "Expected an expression after `,`",
[YP_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = "Expected an expression after `=`",
[YP_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = "Expected an expression after `<<`",
[YP_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = "Expected an expression after `(`",
[YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = "Expected an expression after the operator",
[YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = "Expected an expression after `*` splat in an argument",
[YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = "Expected an expression after `**` in a hash",
[YP_ERR_EXPECT_EXPRESSION_AFTER_STAR] = "Expected an expression after `*`",
[YP_ERR_EXPECT_IDENT_REQ_PARAMETER] = "Expected an identifier for the required parameter",
[YP_ERR_EXPECT_LPAREN_REQ_PARAMETER] = "Expected a `(` to start a required parameter",
[YP_ERR_EXPECT_RBRACKET] = "Expected a matching `]`",
[YP_ERR_EXPECT_RPAREN] = "Expected a matching `)`",
[YP_ERR_EXPECT_RPAREN_AFTER_MULTI] = "Expected a `)` after multiple assignment",
[YP_ERR_EXPECT_RPAREN_REQ_PARAMETER] = "Expected a `)` to end a required parameter",
[YP_ERR_EXPECT_STRING_CONTENT] = "Expected string content after opening string delimiter",
[YP_ERR_EXPECT_WHEN_DELIMITER] = "Expected a delimiter after the predicates of a `when` clause",
[YP_ERR_EXPRESSION_BARE_HASH] = "Unexpected bare hash in expression",
[YP_ERR_FOR_COLLECTION] = "Expected a collection after the `in` in a `for` statement",
[YP_ERR_FOR_INDEX] = "Expected an index after `for`",
[YP_ERR_FOR_IN] = "Expected an `in` after the index in a `for` statement",
[YP_ERR_FOR_TERM] = "Expected an `end` to close the `for` loop",
[YP_ERR_HASH_EXPRESSION_AFTER_LABEL] = "Expected an expression after the label in a hash",
[YP_ERR_HASH_KEY] = "Expected a key in the hash literal",
[YP_ERR_HASH_ROCKET] = "Expected a `=>` between the hash key and value",
[YP_ERR_HASH_TERM] = "Expected a `}` to close the hash literal",
[YP_ERR_HASH_VALUE] = "Expected a value in the hash literal",
[YP_ERR_HEREDOC_TERM] = "Could not find a terminator for the heredoc",
[YP_ERR_INCOMPLETE_QUESTION_MARK] = "Incomplete expression at `?`",
[YP_ERR_INCOMPLETE_VARIABLE_CLASS] = "Incomplete class variable",
[YP_ERR_INCOMPLETE_VARIABLE_INSTANCE] = "Incomplete instance variable",
[YP_ERR_INVALID_ENCODING_MAGIC_COMMENT] = "Unknown or invalid encoding in the magic comment",
[YP_ERR_INVALID_FLOAT_EXPONENT] = "Invalid exponent",
[YP_ERR_INVALID_NUMBER_BINARY] = "Invalid binary number",
[YP_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
[YP_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
[YP_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
[YP_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
[YP_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
[YP_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
[YP_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",
[YP_ERR_LAMBDA_OPEN] = "Expected a `do` keyword or a `{` to open the lambda block",
[YP_ERR_LAMBDA_TERM_BRACE] = "Expected a lambda block beginning with `{` to end with `}`",
[YP_ERR_LAMBDA_TERM_END] = "Expected a lambda block beginning with `do` to end with `end`",
[YP_ERR_LIST_I_LOWER_ELEMENT] = "Expected a symbol in a `%i` list",
[YP_ERR_LIST_I_LOWER_TERM] = "Expected a closing delimiter for the `%i` list",
[YP_ERR_LIST_I_UPPER_ELEMENT] = "Expected a symbol in a `%I` list",
[YP_ERR_LIST_I_UPPER_TERM] = "Expected a closing delimiter for the `%I` list",
[YP_ERR_LIST_W_LOWER_ELEMENT] = "Expected a string in a `%w` list",
[YP_ERR_LIST_W_LOWER_TERM] = "Expected a closing delimiter for the `%w` list",
[YP_ERR_LIST_W_UPPER_ELEMENT] = "Expected a string in a `%W` list",
[YP_ERR_LIST_W_UPPER_TERM] = "Expected a closing delimiter for the `%W` list",
[YP_ERR_MALLOC_FAILED] = "Failed to allocate memory",
[YP_ERR_MODULE_IN_METHOD] = "Unexpected module definition in a method body",
[YP_ERR_MODULE_NAME] = "Expected a constant name after `module`",
[YP_ERR_MODULE_TERM] = "Expected an `end` to close the `module` statement",
[YP_ERR_MULTI_ASSIGN_MULTI_SPLATS] = "Multiple splats in multiple assignment",
[YP_ERR_NOT_EXPRESSION] = "Expected an expression after `not`",
[YP_ERR_NUMBER_LITERAL_UNDERSCORE] = "Number literal ending with a `_`",
[YP_ERR_NUMBERED_PARAMETER_NOT_ALLOWED] = "Numbered parameters are not allowed alongside explicit parameters",
[YP_ERR_NUMBERED_PARAMETER_OUTER_SCOPE] = "Numbered parameter is already used in outer scope",
[YP_ERR_OPERATOR_MULTI_ASSIGN] = "Unexpected operator for a multiple assignment",
[YP_ERR_OPERATOR_WRITE_BLOCK] = "Unexpected operator after a call with a block",
[YP_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = "Unexpected multiple `**` splat parameters",
[YP_ERR_PARAMETER_BLOCK_MULTI] = "Multiple block parameters; only one block is allowed",
[YP_ERR_PARAMETER_METHOD_NAME] = "Unexpected name for a parameter",
[YP_ERR_PARAMETER_NAME_REPEAT] = "Repeated parameter name",
[YP_ERR_PARAMETER_NO_DEFAULT] = "Expected a default value for the parameter",
[YP_ERR_PARAMETER_NO_DEFAULT_KW] = "Expected a default value for the keyword parameter",
[YP_ERR_PARAMETER_NUMBERED_RESERVED] = "Token reserved for a numbered parameter",
[YP_ERR_PARAMETER_ORDER] = "Unexpected parameter order",
[YP_ERR_PARAMETER_SPLAT_MULTI] = "Unexpected multiple `*` splat parameters",
[YP_ERR_PARAMETER_STAR] = "Unexpected parameter `*`",
[YP_ERR_PARAMETER_WILD_LOOSE_COMMA] = "Unexpected `,` in parameters",
[YP_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = "Expected a pattern expression after the `[` operator",
[YP_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = "Expected a pattern expression after `,`",
[YP_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = "Expected a pattern expression after `=>`",
[YP_ERR_PATTERN_EXPRESSION_AFTER_IN] = "Expected a pattern expression after the `in` keyword",
[YP_ERR_PATTERN_EXPRESSION_AFTER_KEY] = "Expected a pattern expression after the key",
[YP_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = "Expected a pattern expression after the `(` operator",
[YP_ERR_PATTERN_EXPRESSION_AFTER_PIN] = "Expected a pattern expression after the `^` pin operator",
[YP_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = "Expected a pattern expression after the `|` operator",
[YP_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = "Expected a pattern expression after the range operator",
[YP_ERR_PATTERN_HASH_KEY] = "Expected a key in the hash pattern",
[YP_ERR_PATTERN_HASH_KEY_LABEL] = "Expected a label as the key in the hash pattern", // TODO // THIS // AND // ABOVE // IS WEIRD
[YP_ERR_PATTERN_IDENT_AFTER_HROCKET] = "Expected an identifier after the `=>` operator",
[YP_ERR_PATTERN_LABEL_AFTER_COMMA] = "Expected a label after the `,` in the hash pattern",
[YP_ERR_PATTERN_REST] = "Unexpected rest pattern",
[YP_ERR_PATTERN_TERM_BRACE] = "Expected a `}` to close the pattern expression",
[YP_ERR_PATTERN_TERM_BRACKET] = "Expected a `]` to close the pattern expression",
[YP_ERR_PATTERN_TERM_PAREN] = "Expected a `)` to close the pattern expression",
[YP_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = "Unexpected `||=` in a multiple assignment",
[YP_ERR_REGEXP_TERM] = "Expected a closing delimiter for the regular expression",
[YP_ERR_RESCUE_EXPRESSION] = "Expected a rescued expression",
[YP_ERR_RESCUE_MODIFIER_VALUE] = "Expected a value after the `rescue` modifier",
[YP_ERR_RESCUE_TERM] = "Expected a closing delimiter for the `rescue` clause",
[YP_ERR_RESCUE_VARIABLE] = "Expected an exception variable after `=>` in a rescue statement",
[YP_ERR_RETURN_INVALID] = "Invalid `return` in a class or module body",
[YP_ERR_STRING_CONCATENATION] = "Expected a string for concatenation",
[YP_ERR_STRING_INTERPOLATED_TERM] = "Expected a closing delimiter for the interpolated string",
[YP_ERR_STRING_LITERAL_TERM] = "Expected a closing delimiter for the string literal",
[YP_ERR_SYMBOL_INVALID] = "Invalid symbol", // TODO expected symbol? yarp.c ~9719
[YP_ERR_SYMBOL_TERM_DYNAMIC] = "Expected a closing delimiter for the dynamic symbol",
[YP_ERR_SYMBOL_TERM_INTERPOLATED] = "Expected a closing delimiter for the interpolated symbol",
[YP_ERR_TERNARY_COLON] = "Expected a `:` after the true expression of a ternary operator",
[YP_ERR_TERNARY_EXPRESSION_FALSE] = "Expected an expression after `:` in the ternary operator",
[YP_ERR_TERNARY_EXPRESSION_TRUE] = "Expected an expression after `?` in the ternary operator",
[YP_ERR_UNDEF_ARGUMENT] = "Invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument",
[YP_ERR_UNARY_RECEIVER_BANG] = "Expected a receiver for unary `!`",
[YP_ERR_UNARY_RECEIVER_MINUS] = "Expected a receiver for unary `-`",
[YP_ERR_UNARY_RECEIVER_PLUS] = "Expected a receiver for unary `+`",
[YP_ERR_UNARY_RECEIVER_TILDE] = "Expected a receiver for unary `~`",
[YP_ERR_UNTIL_TERM] = "Expected an `end` to close the `until` statement",
[YP_ERR_WHILE_TERM] = "Expected an `end` to close the `while` statement",
[YP_ERR_WRITE_TARGET_READONLY] = "Immutable variable as a write target",
[YP_ERR_WRITE_TARGET_UNEXPECTED] = "Unexpected write target",
[YP_ERR_XSTRING_TERM] = "Expected a closing delimiter for the `%x` or backtick string",
[YP_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = "Ambiguous first argument; put parentheses or a space even after `-` operator",
[YP_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = "Ambiguous first argument; put parentheses or a space even after `+` operator",
[YP_WARN_AMBIGUOUS_PREFIX_STAR] = "Ambiguous `*` has been interpreted as an argument prefix",
[YP_WARN_AMBIGUOUS_SLASH] = "Ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator",
static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
[PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
[PM_ERR_ARGUMENT_AFTER_BLOCK] = "Unexpected argument after a block argument",
[PM_ERR_ARGUMENT_BARE_HASH] = "Unexpected bare hash argument",
[PM_ERR_ARGUMENT_BLOCK_MULTI] = "Multiple block arguments; only one block is allowed",
[PM_ERR_ARGUMENT_FORMAL_CLASS] = "Invalid formal argument; formal argument cannot be a class variable",
[PM_ERR_ARGUMENT_FORMAL_CONSTANT] = "Invalid formal argument; formal argument cannot be a constant",
[PM_ERR_ARGUMENT_FORMAL_GLOBAL] = "Invalid formal argument; formal argument cannot be a global variable",
[PM_ERR_ARGUMENT_FORMAL_IVAR] = "Invalid formal argument; formal argument cannot be an instance variable",
[PM_ERR_ARGUMENT_NO_FORWARDING_AMP] = "Unexpected `&` when the parent method is not forwarding",
[PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = "Unexpected `...` when the parent method is not forwarding",
[PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = "Unexpected `*` when the parent method is not forwarding",
[PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = "Unexpected `*` splat argument after a `**` keyword splat argument",
[PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = "Unexpected `*` splat argument after a `*` splat argument",
[PM_ERR_ARGUMENT_TERM_PAREN] = "Expected a `)` to close the arguments",
[PM_ERR_ARGUMENT_UNEXPECTED_BLOCK] = "Unexpected `{` after a method call without parenthesis",
[PM_ERR_ARRAY_ELEMENT] = "Expected an element for the array",
[PM_ERR_ARRAY_EXPRESSION] = "Expected an expression for the array element",
[PM_ERR_ARRAY_EXPRESSION_AFTER_STAR] = "Expected an expression after `*` in the array",
[PM_ERR_ARRAY_SEPARATOR] = "Expected a `,` separator for the array elements",
[PM_ERR_ARRAY_TERM] = "Expected a `]` to close the array",
[PM_ERR_BEGIN_LONELY_ELSE] = "Unexpected `else` in `begin` block; a `rescue` clause must precede `else`",
[PM_ERR_BEGIN_TERM] = "Expected an `end` to close the `begin` statement",
[PM_ERR_BEGIN_UPCASE_BRACE] = "Expected a `{` after `BEGIN`",
[PM_ERR_BEGIN_UPCASE_TERM] = "Expected a `}` to close the `BEGIN` statement",
[PM_ERR_BEGIN_UPCASE_TOPLEVEL] = "BEGIN is permitted only at toplevel",
[PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = "Expected a local variable name in the block parameters",
[PM_ERR_BLOCK_PARAM_PIPE_TERM] = "Expected the block parameters to end with `|`",
[PM_ERR_BLOCK_TERM_BRACE] = "Expected a block beginning with `{` to end with `}`",
[PM_ERR_BLOCK_TERM_END] = "Expected a block beginning with `do` to end with `end`",
[PM_ERR_CANNOT_PARSE_EXPRESSION] = "Cannot parse the expression",
[PM_ERR_CANNOT_PARSE_STRING_PART] = "Cannot parse the string part",
[PM_ERR_CASE_EXPRESSION_AFTER_CASE] = "Expected an expression after `case`",
[PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = "Expected an expression after `when`",
[PM_ERR_CASE_MISSING_CONDITIONS] = "Expected a `when` or `in` clause after `case`",
[PM_ERR_CASE_TERM] = "Expected an `end` to close the `case` statement",
[PM_ERR_CLASS_IN_METHOD] = "Unexpected class definition in a method body",
[PM_ERR_CLASS_NAME] = "Expected a constant name after `class`",
[PM_ERR_CLASS_SUPERCLASS] = "Expected a superclass after `<`",
[PM_ERR_CLASS_TERM] = "Expected an `end` to close the `class` statement",
[PM_ERR_CONDITIONAL_ELSIF_PREDICATE] = "Expected a predicate expression for the `elsif` statement",
[PM_ERR_CONDITIONAL_IF_PREDICATE] = "Expected a predicate expression for the `if` statement",
[PM_ERR_CONDITIONAL_PREDICATE_TERM] = "Expected `then` or `;` or '\n'",
[PM_ERR_CONDITIONAL_TERM] = "Expected an `end` to close the conditional clause",
[PM_ERR_CONDITIONAL_TERM_ELSE] = "Expected an `end` to close the `else` clause",
[PM_ERR_CONDITIONAL_UNLESS_PREDICATE] = "Expected a predicate expression for the `unless` statement",
[PM_ERR_CONDITIONAL_UNTIL_PREDICATE] = "Expected a predicate expression for the `until` statement",
[PM_ERR_CONDITIONAL_WHILE_PREDICATE] = "Expected a predicate expression for the `while` statement",
[PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = "Expected a constant after the `::` operator",
[PM_ERR_DEF_ENDLESS] = "Could not parse the endless method body",
[PM_ERR_DEF_ENDLESS_SETTER] = "Invalid method name; a setter method cannot be defined in an endless method definition",
[PM_ERR_DEF_NAME] = "Expected a method name",
[PM_ERR_DEF_NAME_AFTER_RECEIVER] = "Expected a method name after the receiver",
[PM_ERR_DEF_PARAMS_TERM] = "Expected a delimiter to close the parameters",
[PM_ERR_DEF_PARAMS_TERM_PAREN] = "Expected a `)` to close the parameters",
[PM_ERR_DEF_RECEIVER] = "Expected a receiver for the method definition",
[PM_ERR_DEF_RECEIVER_TERM] = "Expected a `.` or `::` after the receiver in a method definition",
[PM_ERR_DEF_TERM] = "Expected an `end` to close the `def` statement",
[PM_ERR_DEFINED_EXPRESSION] = "Expected an expression after `defined?`",
[PM_ERR_EMBDOC_TERM] = "Could not find a terminator for the embedded document",
[PM_ERR_EMBEXPR_END] = "Expected a `}` to close the embedded expression",
[PM_ERR_EMBVAR_INVALID] = "Invalid embedded variable",
[PM_ERR_END_UPCASE_BRACE] = "Expected a `{` after `END`",
[PM_ERR_END_UPCASE_TERM] = "Expected a `}` to close the `END` statement",
[PM_ERR_ESCAPE_INVALID_CONTROL] = "Invalid control escape sequence",
[PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = "Invalid control escape sequence; control cannot be repeated",
[PM_ERR_ESCAPE_INVALID_HEXADECIMAL] = "Invalid hexadecimal escape sequence",
[PM_ERR_ESCAPE_INVALID_META] = "Invalid meta escape sequence",
[PM_ERR_ESCAPE_INVALID_META_REPEAT] = "Invalid meta escape sequence; meta cannot be repeated",
[PM_ERR_ESCAPE_INVALID_UNICODE] = "Invalid Unicode escape sequence",
[PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = "Invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags",
[PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = "Invalid Unicode escape sequence; multiple codepoints are not allowed in a character literal",
[PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = "Invalid Unicode escape sequence; maximum length is 6 digits",
[PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = "Invalid Unicode escape sequence; needs closing `}`",
[PM_ERR_EXPECT_ARGUMENT] = "Expected an argument",
[PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = "Expected a newline or semicolon after the statement",
[PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = "Expected an expression after `&&=`",
[PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = "Expected an expression after `||=`",
[PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = "Expected an expression after `,`",
[PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = "Expected an expression after `=`",
[PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = "Expected an expression after `<<`",
[PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = "Expected an expression after `(`",
[PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = "Expected an expression after the operator",
[PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = "Expected an expression after `*` splat in an argument",
[PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = "Expected an expression after `**` in a hash",
[PM_ERR_EXPECT_EXPRESSION_AFTER_STAR] = "Expected an expression after `*`",
[PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = "Expected an identifier for the required parameter",
[PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = "Expected a `(` to start a required parameter",
[PM_ERR_EXPECT_RBRACKET] = "Expected a matching `]`",
[PM_ERR_EXPECT_RPAREN] = "Expected a matching `)`",
[PM_ERR_EXPECT_RPAREN_AFTER_MULTI] = "Expected a `)` after multiple assignment",
[PM_ERR_EXPECT_RPAREN_REQ_PARAMETER] = "Expected a `)` to end a required parameter",
[PM_ERR_EXPECT_STRING_CONTENT] = "Expected string content after opening string delimiter",
[PM_ERR_EXPECT_WHEN_DELIMITER] = "Expected a delimiter after the predicates of a `when` clause",
[PM_ERR_EXPRESSION_BARE_HASH] = "Unexpected bare hash in expression",
[PM_ERR_FOR_COLLECTION] = "Expected a collection after the `in` in a `for` statement",
[PM_ERR_FOR_INDEX] = "Expected an index after `for`",
[PM_ERR_FOR_IN] = "Expected an `in` after the index in a `for` statement",
[PM_ERR_FOR_TERM] = "Expected an `end` to close the `for` loop",
[PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = "Expected an expression after the label in a hash",
[PM_ERR_HASH_KEY] = "Expected a key in the hash literal",
[PM_ERR_HASH_ROCKET] = "Expected a `=>` between the hash key and value",
[PM_ERR_HASH_TERM] = "Expected a `}` to close the hash literal",
[PM_ERR_HASH_VALUE] = "Expected a value in the hash literal",
[PM_ERR_HEREDOC_TERM] = "Could not find a terminator for the heredoc",
[PM_ERR_INCOMPLETE_QUESTION_MARK] = "Incomplete expression at `?`",
[PM_ERR_INCOMPLETE_VARIABLE_CLASS] = "Incomplete class variable",
[PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = "Incomplete instance variable",
[PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = "Unknown or invalid encoding in the magic comment",
[PM_ERR_INVALID_FLOAT_EXPONENT] = "Invalid exponent",
[PM_ERR_INVALID_NUMBER_BINARY] = "Invalid binary number",
[PM_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
[PM_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
[PM_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
[PM_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
[PM_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
[PM_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
[PM_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",
[PM_ERR_LAMBDA_OPEN] = "Expected a `do` keyword or a `{` to open the lambda block",
[PM_ERR_LAMBDA_TERM_BRACE] = "Expected a lambda block beginning with `{` to end with `}`",
[PM_ERR_LAMBDA_TERM_END] = "Expected a lambda block beginning with `do` to end with `end`",
[PM_ERR_LIST_I_LOWER_ELEMENT] = "Expected a symbol in a `%i` list",
[PM_ERR_LIST_I_LOWER_TERM] = "Expected a closing delimiter for the `%i` list",
[PM_ERR_LIST_I_UPPER_ELEMENT] = "Expected a symbol in a `%I` list",
[PM_ERR_LIST_I_UPPER_TERM] = "Expected a closing delimiter for the `%I` list",
[PM_ERR_LIST_W_LOWER_ELEMENT] = "Expected a string in a `%w` list",
[PM_ERR_LIST_W_LOWER_TERM] = "Expected a closing delimiter for the `%w` list",
[PM_ERR_LIST_W_UPPER_ELEMENT] = "Expected a string in a `%W` list",
[PM_ERR_LIST_W_UPPER_TERM] = "Expected a closing delimiter for the `%W` list",
[PM_ERR_MALLOC_FAILED] = "Failed to allocate memory",
[PM_ERR_MODULE_IN_METHOD] = "Unexpected module definition in a method body",
[PM_ERR_MODULE_NAME] = "Expected a constant name after `module`",
[PM_ERR_MODULE_TERM] = "Expected an `end` to close the `module` statement",
[PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = "Multiple splats in multiple assignment",
[PM_ERR_NOT_EXPRESSION] = "Expected an expression after `not`",
[PM_ERR_NUMBER_LITERAL_UNDERSCORE] = "Number literal ending with a `_`",
[PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED] = "Numbered parameters are not allowed alongside explicit parameters",
[PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE] = "Numbered parameter is already used in outer scope",
[PM_ERR_OPERATOR_MULTI_ASSIGN] = "Unexpected operator for a multiple assignment",
[PM_ERR_OPERATOR_WRITE_BLOCK] = "Unexpected operator after a call with a block",
[PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = "Unexpected multiple `**` splat parameters",
[PM_ERR_PARAMETER_BLOCK_MULTI] = "Multiple block parameters; only one block is allowed",
[PM_ERR_PARAMETER_METHOD_NAME] = "Unexpected name for a parameter",
[PM_ERR_PARAMETER_NAME_REPEAT] = "Repeated parameter name",
[PM_ERR_PARAMETER_NO_DEFAULT] = "Expected a default value for the parameter",
[PM_ERR_PARAMETER_NO_DEFAULT_KW] = "Expected a default value for the keyword parameter",
[PM_ERR_PARAMETER_NUMBERED_RESERVED] = "Token reserved for a numbered parameter",
[PM_ERR_PARAMETER_ORDER] = "Unexpected parameter order",
[PM_ERR_PARAMETER_SPLAT_MULTI] = "Unexpected multiple `*` splat parameters",
[PM_ERR_PARAMETER_STAR] = "Unexpected parameter `*`",
[PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = "Unexpected `,` in parameters",
[PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = "Expected a pattern expression after the `[` operator",
[PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = "Expected a pattern expression after `,`",
[PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = "Expected a pattern expression after `=>`",
[PM_ERR_PATTERN_EXPRESSION_AFTER_IN] = "Expected a pattern expression after the `in` keyword",
[PM_ERR_PATTERN_EXPRESSION_AFTER_KEY] = "Expected a pattern expression after the key",
[PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = "Expected a pattern expression after the `(` operator",
[PM_ERR_PATTERN_EXPRESSION_AFTER_PIN] = "Expected a pattern expression after the `^` pin operator",
[PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = "Expected a pattern expression after the `|` operator",
[PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = "Expected a pattern expression after the range operator",
[PM_ERR_PATTERN_HASH_KEY] = "Expected a key in the hash pattern",
[PM_ERR_PATTERN_HASH_KEY_LABEL] = "Expected a label as the key in the hash pattern", // TODO // THIS // AND // ABOVE // IS WEIRD
[PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = "Expected an identifier after the `=>` operator",
[PM_ERR_PATTERN_LABEL_AFTER_COMMA] = "Expected a label after the `,` in the hash pattern",
[PM_ERR_PATTERN_REST] = "Unexpected rest pattern",
[PM_ERR_PATTERN_TERM_BRACE] = "Expected a `}` to close the pattern expression",
[PM_ERR_PATTERN_TERM_BRACKET] = "Expected a `]` to close the pattern expression",
[PM_ERR_PATTERN_TERM_PAREN] = "Expected a `)` to close the pattern expression",
[PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = "Unexpected `||=` in a multiple assignment",
[PM_ERR_REGEXP_TERM] = "Expected a closing delimiter for the regular expression",
[PM_ERR_RESCUE_EXPRESSION] = "Expected a rescued expression",
[PM_ERR_RESCUE_MODIFIER_VALUE] = "Expected a value after the `rescue` modifier",
[PM_ERR_RESCUE_TERM] = "Expected a closing delimiter for the `rescue` clause",
[PM_ERR_RESCUE_VARIABLE] = "Expected an exception variable after `=>` in a rescue statement",
[PM_ERR_RETURN_INVALID] = "Invalid `return` in a class or module body",
[PM_ERR_STRING_CONCATENATION] = "Expected a string for concatenation",
[PM_ERR_STRING_INTERPOLATED_TERM] = "Expected a closing delimiter for the interpolated string",
[PM_ERR_STRING_LITERAL_TERM] = "Expected a closing delimiter for the string literal",
[PM_ERR_SYMBOL_INVALID] = "Invalid symbol", // TODO expected symbol? prism.c ~9719
[PM_ERR_SYMBOL_TERM_DYNAMIC] = "Expected a closing delimiter for the dynamic symbol",
[PM_ERR_SYMBOL_TERM_INTERPOLATED] = "Expected a closing delimiter for the interpolated symbol",
[PM_ERR_TERNARY_COLON] = "Expected a `:` after the true expression of a ternary operator",
[PM_ERR_TERNARY_EXPRESSION_FALSE] = "Expected an expression after `:` in the ternary operator",
[PM_ERR_TERNARY_EXPRESSION_TRUE] = "Expected an expression after `?` in the ternary operator",
[PM_ERR_UNDEF_ARGUMENT] = "Invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument",
[PM_ERR_UNARY_RECEIVER_BANG] = "Expected a receiver for unary `!`",
[PM_ERR_UNARY_RECEIVER_MINUS] = "Expected a receiver for unary `-`",
[PM_ERR_UNARY_RECEIVER_PLUS] = "Expected a receiver for unary `+`",
[PM_ERR_UNARY_RECEIVER_TILDE] = "Expected a receiver for unary `~`",
[PM_ERR_UNTIL_TERM] = "Expected an `end` to close the `until` statement",
[PM_ERR_WHILE_TERM] = "Expected an `end` to close the `while` statement",
[PM_ERR_WRITE_TARGET_READONLY] = "Immutable variable as a write target",
[PM_ERR_WRITE_TARGET_UNEXPECTED] = "Unexpected write target",
[PM_ERR_XSTRING_TERM] = "Expected a closing delimiter for the `%x` or backtick string",
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = "Ambiguous first argument; put parentheses or a space even after `-` operator",
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = "Ambiguous first argument; put parentheses or a space even after `+` operator",
[PM_WARN_AMBIGUOUS_PREFIX_STAR] = "Ambiguous `*` has been interpreted as an argument prefix",
[PM_WARN_AMBIGUOUS_SLASH] = "Ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator",
};
static const char*
yp_diagnostic_message(yp_diagnostic_id_t diag_id) {
assert(diag_id < YP_DIAGNOSTIC_ID_LEN);
pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
assert(diag_id < PM_DIAGNOSTIC_ID_LEN);
const char *message = diagnostic_messages[diag_id];
assert(message);
return message;
@ -262,24 +262,24 @@ yp_diagnostic_message(yp_diagnostic_id_t diag_id) {
// Append an error to the given list of diagnostic.
bool
yp_diagnostic_list_append(yp_list_t *list, const uint8_t *start, const uint8_t *end, yp_diagnostic_id_t diag_id) {
yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) malloc(sizeof(yp_diagnostic_t));
pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) malloc(sizeof(pm_diagnostic_t));
if (diagnostic == NULL) return false;
*diagnostic = (yp_diagnostic_t) { .start = start, .end = end, .message = yp_diagnostic_message(diag_id) };
yp_list_append(list, (yp_list_node_t *) diagnostic);
*diagnostic = (pm_diagnostic_t) { .start = start, .end = end, .message = pm_diagnostic_message(diag_id) };
pm_list_append(list, (pm_list_node_t *) diagnostic);
return true;
}
// Deallocate the internal state of the given diagnostic list.
void
yp_diagnostic_list_free(yp_list_t *list) {
yp_list_node_t *node, *next;
pm_diagnostic_list_free(pm_list_t *list) {
pm_list_node_t *node, *next;
for (node = list->head; node != NULL; node = next) {
next = node->next;
yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) node;
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) node;
free(diagnostic);
}
}

Просмотреть файл

@ -1,8 +1,8 @@
#ifndef YARP_DIAGNOSTIC_H
#define YARP_DIAGNOSTIC_H
#ifndef PRISM_DIAGNOSTIC_H
#define PRISM_DIAGNOSTIC_H
#include "yarp/defines.h"
#include "yarp/util/yp_list.h"
#include "prism/defines.h"
#include "prism/util/pm_list.h"
#include <stdbool.h>
#include <stdlib.h>
@ -10,220 +10,220 @@
// This struct represents a diagnostic found during parsing.
typedef struct {
yp_list_node_t node;
pm_list_node_t node;
const uint8_t *start;
const uint8_t *end;
const char *message;
} yp_diagnostic_t;
} pm_diagnostic_t;
typedef enum {
YP_ERR_ALIAS_ARGUMENT,
YP_ERR_AMPAMPEQ_MULTI_ASSIGN,
YP_ERR_ARGUMENT_AFTER_BLOCK,
YP_ERR_ARGUMENT_BARE_HASH,
YP_ERR_ARGUMENT_BLOCK_MULTI,
YP_ERR_ARGUMENT_FORMAL_CLASS,
YP_ERR_ARGUMENT_FORMAL_CONSTANT,
YP_ERR_ARGUMENT_FORMAL_GLOBAL,
YP_ERR_ARGUMENT_FORMAL_IVAR,
YP_ERR_ARGUMENT_NO_FORWARDING_AMP,
YP_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
YP_ERR_ARGUMENT_NO_FORWARDING_STAR,
YP_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
YP_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
YP_ERR_ARGUMENT_TERM_PAREN,
YP_ERR_ARGUMENT_UNEXPECTED_BLOCK,
YP_ERR_ARRAY_ELEMENT,
YP_ERR_ARRAY_EXPRESSION,
YP_ERR_ARRAY_EXPRESSION_AFTER_STAR,
YP_ERR_ARRAY_SEPARATOR,
YP_ERR_ARRAY_TERM,
YP_ERR_BEGIN_LONELY_ELSE,
YP_ERR_BEGIN_TERM,
YP_ERR_BEGIN_UPCASE_BRACE,
YP_ERR_BEGIN_UPCASE_TERM,
YP_ERR_BEGIN_UPCASE_TOPLEVEL,
YP_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
YP_ERR_BLOCK_PARAM_PIPE_TERM,
YP_ERR_BLOCK_TERM_BRACE,
YP_ERR_BLOCK_TERM_END,
YP_ERR_CANNOT_PARSE_EXPRESSION,
YP_ERR_CANNOT_PARSE_STRING_PART,
YP_ERR_CASE_EXPRESSION_AFTER_CASE,
YP_ERR_CASE_EXPRESSION_AFTER_WHEN,
YP_ERR_CASE_MISSING_CONDITIONS,
YP_ERR_CASE_TERM,
YP_ERR_CLASS_IN_METHOD,
YP_ERR_CLASS_NAME,
YP_ERR_CLASS_SUPERCLASS,
YP_ERR_CLASS_TERM,
YP_ERR_CONDITIONAL_ELSIF_PREDICATE,
YP_ERR_CONDITIONAL_IF_PREDICATE,
YP_ERR_CONDITIONAL_PREDICATE_TERM,
YP_ERR_CONDITIONAL_TERM,
YP_ERR_CONDITIONAL_TERM_ELSE,
YP_ERR_CONDITIONAL_UNLESS_PREDICATE,
YP_ERR_CONDITIONAL_UNTIL_PREDICATE,
YP_ERR_CONDITIONAL_WHILE_PREDICATE,
YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
YP_ERR_DEF_ENDLESS,
YP_ERR_DEF_ENDLESS_SETTER,
YP_ERR_DEF_NAME,
YP_ERR_DEF_NAME_AFTER_RECEIVER,
YP_ERR_DEF_PARAMS_TERM,
YP_ERR_DEF_PARAMS_TERM_PAREN,
YP_ERR_DEF_RECEIVER,
YP_ERR_DEF_RECEIVER_TERM,
YP_ERR_DEF_TERM,
YP_ERR_DEFINED_EXPRESSION,
YP_ERR_EMBDOC_TERM,
YP_ERR_EMBEXPR_END,
YP_ERR_EMBVAR_INVALID,
YP_ERR_END_UPCASE_BRACE,
YP_ERR_END_UPCASE_TERM,
YP_ERR_ESCAPE_INVALID_CONTROL,
YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
YP_ERR_ESCAPE_INVALID_HEXADECIMAL,
YP_ERR_ESCAPE_INVALID_META,
YP_ERR_ESCAPE_INVALID_META_REPEAT,
YP_ERR_ESCAPE_INVALID_UNICODE,
YP_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
YP_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
YP_ERR_ESCAPE_INVALID_UNICODE_LONG,
YP_ERR_ESCAPE_INVALID_UNICODE_TERM,
YP_ERR_EXPECT_ARGUMENT,
YP_ERR_EXPECT_EOL_AFTER_STATEMENT,
YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
YP_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
YP_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
YP_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
YP_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
YP_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
YP_ERR_EXPECT_EXPRESSION_AFTER_STAR,
YP_ERR_EXPECT_IDENT_REQ_PARAMETER,
YP_ERR_EXPECT_LPAREN_REQ_PARAMETER,
YP_ERR_EXPECT_RBRACKET,
YP_ERR_EXPECT_RPAREN,
YP_ERR_EXPECT_RPAREN_AFTER_MULTI,
YP_ERR_EXPECT_RPAREN_REQ_PARAMETER,
YP_ERR_EXPECT_STRING_CONTENT,
YP_ERR_EXPECT_WHEN_DELIMITER,
YP_ERR_EXPRESSION_BARE_HASH,
YP_ERR_FOR_COLLECTION,
YP_ERR_FOR_IN,
YP_ERR_FOR_INDEX,
YP_ERR_FOR_TERM,
YP_ERR_HASH_EXPRESSION_AFTER_LABEL,
YP_ERR_HASH_KEY,
YP_ERR_HASH_ROCKET,
YP_ERR_HASH_TERM,
YP_ERR_HASH_VALUE,
YP_ERR_HEREDOC_TERM,
YP_ERR_INCOMPLETE_QUESTION_MARK,
YP_ERR_INCOMPLETE_VARIABLE_CLASS,
YP_ERR_INCOMPLETE_VARIABLE_INSTANCE,
YP_ERR_INVALID_ENCODING_MAGIC_COMMENT,
YP_ERR_INVALID_FLOAT_EXPONENT,
YP_ERR_INVALID_NUMBER_BINARY,
YP_ERR_INVALID_NUMBER_DECIMAL,
YP_ERR_INVALID_NUMBER_HEXADECIMAL,
YP_ERR_INVALID_NUMBER_OCTAL,
YP_ERR_INVALID_NUMBER_UNDERSCORE,
YP_ERR_INVALID_PERCENT,
YP_ERR_INVALID_TOKEN,
YP_ERR_INVALID_VARIABLE_GLOBAL,
YP_ERR_LAMBDA_OPEN,
YP_ERR_LAMBDA_TERM_BRACE,
YP_ERR_LAMBDA_TERM_END,
YP_ERR_LIST_I_LOWER_ELEMENT,
YP_ERR_LIST_I_LOWER_TERM,
YP_ERR_LIST_I_UPPER_ELEMENT,
YP_ERR_LIST_I_UPPER_TERM,
YP_ERR_LIST_W_LOWER_ELEMENT,
YP_ERR_LIST_W_LOWER_TERM,
YP_ERR_LIST_W_UPPER_ELEMENT,
YP_ERR_LIST_W_UPPER_TERM,
YP_ERR_MALLOC_FAILED,
YP_ERR_MODULE_IN_METHOD,
YP_ERR_MODULE_NAME,
YP_ERR_MODULE_TERM,
YP_ERR_MULTI_ASSIGN_MULTI_SPLATS,
YP_ERR_NOT_EXPRESSION,
YP_ERR_NUMBER_LITERAL_UNDERSCORE,
YP_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
YP_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
YP_ERR_OPERATOR_MULTI_ASSIGN,
YP_ERR_OPERATOR_WRITE_BLOCK,
YP_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
YP_ERR_PARAMETER_BLOCK_MULTI,
YP_ERR_PARAMETER_METHOD_NAME,
YP_ERR_PARAMETER_NAME_REPEAT,
YP_ERR_PARAMETER_NO_DEFAULT,
YP_ERR_PARAMETER_NO_DEFAULT_KW,
YP_ERR_PARAMETER_NUMBERED_RESERVED,
YP_ERR_PARAMETER_ORDER,
YP_ERR_PARAMETER_SPLAT_MULTI,
YP_ERR_PARAMETER_STAR,
YP_ERR_PARAMETER_WILD_LOOSE_COMMA,
YP_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
YP_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
YP_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
YP_ERR_PATTERN_EXPRESSION_AFTER_IN,
YP_ERR_PATTERN_EXPRESSION_AFTER_KEY,
YP_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
YP_ERR_PATTERN_EXPRESSION_AFTER_PIN,
YP_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
YP_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
YP_ERR_PATTERN_HASH_KEY,
YP_ERR_PATTERN_HASH_KEY_LABEL,
YP_ERR_PATTERN_IDENT_AFTER_HROCKET,
YP_ERR_PATTERN_LABEL_AFTER_COMMA,
YP_ERR_PATTERN_REST,
YP_ERR_PATTERN_TERM_BRACE,
YP_ERR_PATTERN_TERM_BRACKET,
YP_ERR_PATTERN_TERM_PAREN,
YP_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
YP_ERR_REGEXP_TERM,
YP_ERR_RESCUE_EXPRESSION,
YP_ERR_RESCUE_MODIFIER_VALUE,
YP_ERR_RESCUE_TERM,
YP_ERR_RESCUE_VARIABLE,
YP_ERR_RETURN_INVALID,
YP_ERR_STRING_CONCATENATION,
YP_ERR_STRING_INTERPOLATED_TERM,
YP_ERR_STRING_LITERAL_TERM,
YP_ERR_SYMBOL_INVALID,
YP_ERR_SYMBOL_TERM_DYNAMIC,
YP_ERR_SYMBOL_TERM_INTERPOLATED,
YP_ERR_TERNARY_COLON,
YP_ERR_TERNARY_EXPRESSION_FALSE,
YP_ERR_TERNARY_EXPRESSION_TRUE,
YP_ERR_UNARY_RECEIVER_BANG,
YP_ERR_UNARY_RECEIVER_MINUS,
YP_ERR_UNARY_RECEIVER_PLUS,
YP_ERR_UNARY_RECEIVER_TILDE,
YP_ERR_UNDEF_ARGUMENT,
YP_ERR_UNTIL_TERM,
YP_ERR_WHILE_TERM,
YP_ERR_WRITE_TARGET_READONLY,
YP_ERR_WRITE_TARGET_UNEXPECTED,
YP_ERR_XSTRING_TERM,
YP_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
YP_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
YP_WARN_AMBIGUOUS_PREFIX_STAR,
YP_WARN_AMBIGUOUS_SLASH,
PM_ERR_ALIAS_ARGUMENT,
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
PM_ERR_ARGUMENT_AFTER_BLOCK,
PM_ERR_ARGUMENT_BARE_HASH,
PM_ERR_ARGUMENT_BLOCK_MULTI,
PM_ERR_ARGUMENT_FORMAL_CLASS,
PM_ERR_ARGUMENT_FORMAL_CONSTANT,
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
PM_ERR_ARGUMENT_FORMAL_IVAR,
PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
PM_ERR_ARGUMENT_TERM_PAREN,
PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
PM_ERR_ARRAY_ELEMENT,
PM_ERR_ARRAY_EXPRESSION,
PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
PM_ERR_ARRAY_SEPARATOR,
PM_ERR_ARRAY_TERM,
PM_ERR_BEGIN_LONELY_ELSE,
PM_ERR_BEGIN_TERM,
PM_ERR_BEGIN_UPCASE_BRACE,
PM_ERR_BEGIN_UPCASE_TERM,
PM_ERR_BEGIN_UPCASE_TOPLEVEL,
PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
PM_ERR_BLOCK_PARAM_PIPE_TERM,
PM_ERR_BLOCK_TERM_BRACE,
PM_ERR_BLOCK_TERM_END,
PM_ERR_CANNOT_PARSE_EXPRESSION,
PM_ERR_CANNOT_PARSE_STRING_PART,
PM_ERR_CASE_EXPRESSION_AFTER_CASE,
PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
PM_ERR_CASE_MISSING_CONDITIONS,
PM_ERR_CASE_TERM,
PM_ERR_CLASS_IN_METHOD,
PM_ERR_CLASS_NAME,
PM_ERR_CLASS_SUPERCLASS,
PM_ERR_CLASS_TERM,
PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
PM_ERR_CONDITIONAL_IF_PREDICATE,
PM_ERR_CONDITIONAL_PREDICATE_TERM,
PM_ERR_CONDITIONAL_TERM,
PM_ERR_CONDITIONAL_TERM_ELSE,
PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
PM_ERR_CONDITIONAL_WHILE_PREDICATE,
PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
PM_ERR_DEF_ENDLESS,
PM_ERR_DEF_ENDLESS_SETTER,
PM_ERR_DEF_NAME,
PM_ERR_DEF_NAME_AFTER_RECEIVER,
PM_ERR_DEF_PARAMS_TERM,
PM_ERR_DEF_PARAMS_TERM_PAREN,
PM_ERR_DEF_RECEIVER,
PM_ERR_DEF_RECEIVER_TERM,
PM_ERR_DEF_TERM,
PM_ERR_DEFINED_EXPRESSION,
PM_ERR_EMBDOC_TERM,
PM_ERR_EMBEXPR_END,
PM_ERR_EMBVAR_INVALID,
PM_ERR_END_UPCASE_BRACE,
PM_ERR_END_UPCASE_TERM,
PM_ERR_ESCAPE_INVALID_CONTROL,
PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
PM_ERR_ESCAPE_INVALID_META,
PM_ERR_ESCAPE_INVALID_META_REPEAT,
PM_ERR_ESCAPE_INVALID_UNICODE,
PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
PM_ERR_EXPECT_ARGUMENT,
PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
PM_ERR_EXPECT_RBRACKET,
PM_ERR_EXPECT_RPAREN,
PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
PM_ERR_EXPECT_STRING_CONTENT,
PM_ERR_EXPECT_WHEN_DELIMITER,
PM_ERR_EXPRESSION_BARE_HASH,
PM_ERR_FOR_COLLECTION,
PM_ERR_FOR_IN,
PM_ERR_FOR_INDEX,
PM_ERR_FOR_TERM,
PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
PM_ERR_HASH_KEY,
PM_ERR_HASH_ROCKET,
PM_ERR_HASH_TERM,
PM_ERR_HASH_VALUE,
PM_ERR_HEREDOC_TERM,
PM_ERR_INCOMPLETE_QUESTION_MARK,
PM_ERR_INCOMPLETE_VARIABLE_CLASS,
PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
PM_ERR_INVALID_FLOAT_EXPONENT,
PM_ERR_INVALID_NUMBER_BINARY,
PM_ERR_INVALID_NUMBER_DECIMAL,
PM_ERR_INVALID_NUMBER_HEXADECIMAL,
PM_ERR_INVALID_NUMBER_OCTAL,
PM_ERR_INVALID_NUMBER_UNDERSCORE,
PM_ERR_INVALID_PERCENT,
PM_ERR_INVALID_TOKEN,
PM_ERR_INVALID_VARIABLE_GLOBAL,
PM_ERR_LAMBDA_OPEN,
PM_ERR_LAMBDA_TERM_BRACE,
PM_ERR_LAMBDA_TERM_END,
PM_ERR_LIST_I_LOWER_ELEMENT,
PM_ERR_LIST_I_LOWER_TERM,
PM_ERR_LIST_I_UPPER_ELEMENT,
PM_ERR_LIST_I_UPPER_TERM,
PM_ERR_LIST_W_LOWER_ELEMENT,
PM_ERR_LIST_W_LOWER_TERM,
PM_ERR_LIST_W_UPPER_ELEMENT,
PM_ERR_LIST_W_UPPER_TERM,
PM_ERR_MALLOC_FAILED,
PM_ERR_MODULE_IN_METHOD,
PM_ERR_MODULE_NAME,
PM_ERR_MODULE_TERM,
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
PM_ERR_NOT_EXPRESSION,
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
PM_ERR_OPERATOR_MULTI_ASSIGN,
PM_ERR_OPERATOR_WRITE_BLOCK,
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
PM_ERR_PARAMETER_BLOCK_MULTI,
PM_ERR_PARAMETER_METHOD_NAME,
PM_ERR_PARAMETER_NAME_REPEAT,
PM_ERR_PARAMETER_NO_DEFAULT,
PM_ERR_PARAMETER_NO_DEFAULT_KW,
PM_ERR_PARAMETER_NUMBERED_RESERVED,
PM_ERR_PARAMETER_ORDER,
PM_ERR_PARAMETER_SPLAT_MULTI,
PM_ERR_PARAMETER_STAR,
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
PM_ERR_PATTERN_HASH_KEY,
PM_ERR_PATTERN_HASH_KEY_LABEL,
PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
PM_ERR_PATTERN_LABEL_AFTER_COMMA,
PM_ERR_PATTERN_REST,
PM_ERR_PATTERN_TERM_BRACE,
PM_ERR_PATTERN_TERM_BRACKET,
PM_ERR_PATTERN_TERM_PAREN,
PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
PM_ERR_REGEXP_TERM,
PM_ERR_RESCUE_EXPRESSION,
PM_ERR_RESCUE_MODIFIER_VALUE,
PM_ERR_RESCUE_TERM,
PM_ERR_RESCUE_VARIABLE,
PM_ERR_RETURN_INVALID,
PM_ERR_STRING_CONCATENATION,
PM_ERR_STRING_INTERPOLATED_TERM,
PM_ERR_STRING_LITERAL_TERM,
PM_ERR_SYMBOL_INVALID,
PM_ERR_SYMBOL_TERM_DYNAMIC,
PM_ERR_SYMBOL_TERM_INTERPOLATED,
PM_ERR_TERNARY_COLON,
PM_ERR_TERNARY_EXPRESSION_FALSE,
PM_ERR_TERNARY_EXPRESSION_TRUE,
PM_ERR_UNARY_RECEIVER_BANG,
PM_ERR_UNARY_RECEIVER_MINUS,
PM_ERR_UNARY_RECEIVER_PLUS,
PM_ERR_UNARY_RECEIVER_TILDE,
PM_ERR_UNDEF_ARGUMENT,
PM_ERR_UNTIL_TERM,
PM_ERR_WHILE_TERM,
PM_ERR_WRITE_TARGET_READONLY,
PM_ERR_WRITE_TARGET_UNEXPECTED,
PM_ERR_XSTRING_TERM,
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
PM_WARN_AMBIGUOUS_PREFIX_STAR,
PM_WARN_AMBIGUOUS_SLASH,
/* This must be the last member. */
YP_DIAGNOSTIC_ID_LEN,
} yp_diagnostic_id_t;
PM_DIAGNOSTIC_ID_LEN,
} pm_diagnostic_id_t;
// Append a diagnostic to the given list of diagnostics.
bool yp_diagnostic_list_append(yp_list_t *list, const uint8_t *start, const uint8_t *end, yp_diagnostic_id_t diag_id);
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
// Deallocate the internal state of the given diagnostic list.
void yp_diagnostic_list_free(yp_list_t *list);
void pm_diagnostic_list_free(pm_list_t *list);
#endif

Просмотреть файл

@ -1,7 +1,7 @@
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
static size_t
yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
@ -16,37 +16,37 @@ yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
}
static size_t
yp_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_big5_char_width(b, n) == 1) {
return yp_encoding_ascii_alpha_char(b, n);
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
yp_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_big5_char_width(b, n) == 1) {
return yp_encoding_ascii_alnum_char(b, n);
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
yp_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_big5_char_width(b, n) == 1) {
return yp_encoding_ascii_isupper_char(b, n);
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_big5_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return false;
}
}
yp_encoding_t yp_encoding_big5 = {
pm_encoding_t pm_encoding_big5 = {
.name = "big5",
.char_width = yp_encoding_big5_char_width,
.alnum_char = yp_encoding_big5_alnum_char,
.alpha_char = yp_encoding_big5_alpha_char,
.isupper_char = yp_encoding_big5_isupper_char,
.char_width = pm_encoding_big5_char_width,
.alnum_char = pm_encoding_big5_alnum_char,
.alpha_char = pm_encoding_big5_alpha_char,
.isupper_char = pm_encoding_big5_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -1,7 +1,7 @@
#ifndef YARP_ENCODING_H
#define YARP_ENCODING_H
#ifndef PRISM_ENCODING_H
#define PRISM_ENCODING_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <assert.h>
#include <stdbool.h>
@ -39,57 +39,57 @@ typedef struct {
// Return true if the encoding is a multibyte encoding.
bool multibyte;
} yp_encoding_t;
} pm_encoding_t;
// These bits define the location of each bit of metadata within the various
// lookup tables that are used to determine the properties of a character.
#define YP_ENCODING_ALPHABETIC_BIT 1 << 0
#define YP_ENCODING_ALPHANUMERIC_BIT 1 << 1
#define YP_ENCODING_UPPERCASE_BIT 1 << 2
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
// These functions are reused by some other encodings, so they are defined here
// so they can be shared.
size_t yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
size_t yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
bool yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
// These functions are shared between the actual encoding and the fast path in
// the parser so they need to be internally visible.
size_t yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
size_t yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
// This lookup table is referenced in both the UTF-8 encoding file and the
// parser directly in order to speed up the default encoding processing.
extern const uint8_t yp_encoding_unicode_table[256];
extern const uint8_t pm_encoding_unicode_table[256];
// These are the encodings that are supported by the parser. They are defined in
// their own files in the src/enc directory.
extern yp_encoding_t yp_encoding_ascii;
extern yp_encoding_t yp_encoding_ascii_8bit;
extern yp_encoding_t yp_encoding_big5;
extern yp_encoding_t yp_encoding_euc_jp;
extern yp_encoding_t yp_encoding_gbk;
extern yp_encoding_t yp_encoding_iso_8859_1;
extern yp_encoding_t yp_encoding_iso_8859_2;
extern yp_encoding_t yp_encoding_iso_8859_3;
extern yp_encoding_t yp_encoding_iso_8859_4;
extern yp_encoding_t yp_encoding_iso_8859_5;
extern yp_encoding_t yp_encoding_iso_8859_6;
extern yp_encoding_t yp_encoding_iso_8859_7;
extern yp_encoding_t yp_encoding_iso_8859_8;
extern yp_encoding_t yp_encoding_iso_8859_9;
extern yp_encoding_t yp_encoding_iso_8859_10;
extern yp_encoding_t yp_encoding_iso_8859_11;
extern yp_encoding_t yp_encoding_iso_8859_13;
extern yp_encoding_t yp_encoding_iso_8859_14;
extern yp_encoding_t yp_encoding_iso_8859_15;
extern yp_encoding_t yp_encoding_iso_8859_16;
extern yp_encoding_t yp_encoding_koi8_r;
extern yp_encoding_t yp_encoding_shift_jis;
extern yp_encoding_t yp_encoding_utf_8;
extern yp_encoding_t yp_encoding_utf8_mac;
extern yp_encoding_t yp_encoding_windows_31j;
extern yp_encoding_t yp_encoding_windows_1251;
extern yp_encoding_t yp_encoding_windows_1252;
extern pm_encoding_t pm_encoding_ascii;
extern pm_encoding_t pm_encoding_ascii_8bit;
extern pm_encoding_t pm_encoding_big5;
extern pm_encoding_t pm_encoding_euc_jp;
extern pm_encoding_t pm_encoding_gbk;
extern pm_encoding_t pm_encoding_iso_8859_1;
extern pm_encoding_t pm_encoding_iso_8859_2;
extern pm_encoding_t pm_encoding_iso_8859_3;
extern pm_encoding_t pm_encoding_iso_8859_4;
extern pm_encoding_t pm_encoding_iso_8859_5;
extern pm_encoding_t pm_encoding_iso_8859_6;
extern pm_encoding_t pm_encoding_iso_8859_7;
extern pm_encoding_t pm_encoding_iso_8859_8;
extern pm_encoding_t pm_encoding_iso_8859_9;
extern pm_encoding_t pm_encoding_iso_8859_10;
extern pm_encoding_t pm_encoding_iso_8859_11;
extern pm_encoding_t pm_encoding_iso_8859_13;
extern pm_encoding_t pm_encoding_iso_8859_14;
extern pm_encoding_t pm_encoding_iso_8859_15;
extern pm_encoding_t pm_encoding_iso_8859_16;
extern pm_encoding_t pm_encoding_koi8_r;
extern pm_encoding_t pm_encoding_shift_jis;
extern pm_encoding_t pm_encoding_utf_8;
extern pm_encoding_t pm_encoding_utf8_mac;
extern pm_encoding_t pm_encoding_windows_31j;
extern pm_encoding_t pm_encoding_windows_1251;
extern pm_encoding_t pm_encoding_windows_1252;
#endif

Просмотреть файл

@ -1,7 +1,7 @@
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
static size_t
yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
@ -22,37 +22,37 @@ yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
}
static size_t
yp_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
return yp_encoding_ascii_alpha_char(b, n);
pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
yp_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
return yp_encoding_ascii_alnum_char(b, n);
pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
yp_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
return yp_encoding_ascii_isupper_char(b, n);
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return 0;
}
}
yp_encoding_t yp_encoding_euc_jp = {
pm_encoding_t pm_encoding_euc_jp = {
.name = "euc-jp",
.char_width = yp_encoding_euc_jp_char_width,
.alnum_char = yp_encoding_euc_jp_alnum_char,
.alpha_char = yp_encoding_euc_jp_alpha_char,
.isupper_char = yp_encoding_euc_jp_isupper_char,
.char_width = pm_encoding_euc_jp_char_width,
.alnum_char = pm_encoding_euc_jp_alnum_char,
.alpha_char = pm_encoding_euc_jp_alpha_char,
.isupper_char = pm_encoding_euc_jp_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -1,7 +1,7 @@
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
static size_t
yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
return 1;
@ -25,37 +25,37 @@ yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
}
static size_t
yp_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_gbk_char_width(b, n) == 1) {
return yp_encoding_ascii_alpha_char(b, n);
pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_gbk_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
yp_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_gbk_char_width(b, n) == 1) {
return yp_encoding_ascii_alnum_char(b, n);
pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_gbk_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
yp_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_gbk_char_width(b, n) == 1) {
return yp_encoding_ascii_isupper_char(b, n);
pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_gbk_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return false;
}
}
yp_encoding_t yp_encoding_gbk = {
pm_encoding_t pm_encoding_gbk = {
.name = "gbk",
.char_width = yp_encoding_gbk_char_width,
.alnum_char = yp_encoding_gbk_alnum_char,
.alpha_char = yp_encoding_gbk_alpha_char,
.isupper_char = yp_encoding_gbk_isupper_char,
.char_width = pm_encoding_gbk_char_width,
.alnum_char = pm_encoding_gbk_alnum_char,
.alpha_char = pm_encoding_gbk_alpha_char,
.isupper_char = pm_encoding_gbk_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -1,7 +1,7 @@
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
static size_t
yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
return 1;
@ -20,37 +20,37 @@ yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
}
static size_t
yp_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
return yp_encoding_ascii_alpha_char(b, n);
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
yp_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
return yp_encoding_ascii_alnum_char(b, n);
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
yp_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
return yp_encoding_ascii_isupper_char(b, n);
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return 0;
}
}
yp_encoding_t yp_encoding_shift_jis = {
pm_encoding_t pm_encoding_shift_jis = {
.name = "shift_jis",
.char_width = yp_encoding_shift_jis_char_width,
.alnum_char = yp_encoding_shift_jis_alnum_char,
.alpha_char = yp_encoding_shift_jis_alpha_char,
.isupper_char = yp_encoding_shift_jis_isupper_char,
.char_width = pm_encoding_shift_jis_char_width,
.alnum_char = pm_encoding_shift_jis_alnum_char,
.alpha_char = pm_encoding_shift_jis_alpha_char,
.isupper_char = pm_encoding_shift_jis_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -1,8 +1,8 @@
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ASCII character.
static uint8_t yp_encoding_ascii_table[256] = {
static uint8_t pm_encoding_ascii_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -24,7 +24,7 @@ static uint8_t yp_encoding_ascii_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-1 character.
static uint8_t yp_encoding_iso_8859_1_table[256] = {
static uint8_t pm_encoding_iso_8859_1_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -46,7 +46,7 @@ static uint8_t yp_encoding_iso_8859_1_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-2 character.
static uint8_t yp_encoding_iso_8859_2_table[256] = {
static uint8_t pm_encoding_iso_8859_2_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -68,7 +68,7 @@ static uint8_t yp_encoding_iso_8859_2_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-3 character.
static uint8_t yp_encoding_iso_8859_3_table[256] = {
static uint8_t pm_encoding_iso_8859_3_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -90,7 +90,7 @@ static uint8_t yp_encoding_iso_8859_3_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-4 character.
static uint8_t yp_encoding_iso_8859_4_table[256] = {
static uint8_t pm_encoding_iso_8859_4_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -112,7 +112,7 @@ static uint8_t yp_encoding_iso_8859_4_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-5 character.
static uint8_t yp_encoding_iso_8859_5_table[256] = {
static uint8_t pm_encoding_iso_8859_5_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -134,7 +134,7 @@ static uint8_t yp_encoding_iso_8859_5_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-6 character.
static uint8_t yp_encoding_iso_8859_6_table[256] = {
static uint8_t pm_encoding_iso_8859_6_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -156,7 +156,7 @@ static uint8_t yp_encoding_iso_8859_6_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-7 character.
static uint8_t yp_encoding_iso_8859_7_table[256] = {
static uint8_t pm_encoding_iso_8859_7_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -178,7 +178,7 @@ static uint8_t yp_encoding_iso_8859_7_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-8 character.
static uint8_t yp_encoding_iso_8859_8_table[256] = {
static uint8_t pm_encoding_iso_8859_8_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -200,7 +200,7 @@ static uint8_t yp_encoding_iso_8859_8_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-9 character.
static uint8_t yp_encoding_iso_8859_9_table[256] = {
static uint8_t pm_encoding_iso_8859_9_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -222,7 +222,7 @@ static uint8_t yp_encoding_iso_8859_9_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-10 character.
static uint8_t yp_encoding_iso_8859_10_table[256] = {
static uint8_t pm_encoding_iso_8859_10_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -244,7 +244,7 @@ static uint8_t yp_encoding_iso_8859_10_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-11 character.
static uint8_t yp_encoding_iso_8859_11_table[256] = {
static uint8_t pm_encoding_iso_8859_11_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -266,7 +266,7 @@ static uint8_t yp_encoding_iso_8859_11_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-13 character.
static uint8_t yp_encoding_iso_8859_13_table[256] = {
static uint8_t pm_encoding_iso_8859_13_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -288,7 +288,7 @@ static uint8_t yp_encoding_iso_8859_13_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-14 character.
static uint8_t yp_encoding_iso_8859_14_table[256] = {
static uint8_t pm_encoding_iso_8859_14_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -310,7 +310,7 @@ static uint8_t yp_encoding_iso_8859_14_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-15 character.
static uint8_t yp_encoding_iso_8859_15_table[256] = {
static uint8_t pm_encoding_iso_8859_15_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -332,7 +332,7 @@ static uint8_t yp_encoding_iso_8859_15_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding ISO-8859-16 character.
static uint8_t yp_encoding_iso_8859_16_table[256] = {
static uint8_t pm_encoding_iso_8859_16_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -354,7 +354,7 @@ static uint8_t yp_encoding_iso_8859_16_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding KOI8-R character.
static uint8_t yp_encoding_koi8_r_table[256] = {
static uint8_t pm_encoding_koi8_r_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -376,7 +376,7 @@ static uint8_t yp_encoding_koi8_r_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding windows-1251 character.
static uint8_t yp_encoding_windows_1251_table[256] = {
static uint8_t pm_encoding_windows_1251_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -398,7 +398,7 @@ static uint8_t yp_encoding_windows_1251_table[256] = {
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding windows-1252 character.
static uint8_t yp_encoding_windows_1252_table[256] = {
static uint8_t pm_encoding_windows_1252_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -419,89 +419,89 @@ static uint8_t yp_encoding_windows_1252_table[256] = {
};
static size_t
yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return *b < 0x80 ? 1 : 0;
}
size_t
yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
}
size_t
yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
}
bool
yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
}
static size_t
yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
}
static size_t
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return 1;
}
yp_encoding_t yp_encoding_ascii = {
pm_encoding_t pm_encoding_ascii = {
.name = "ascii",
.char_width = yp_encoding_ascii_char_width,
.alnum_char = yp_encoding_ascii_alnum_char,
.alpha_char = yp_encoding_ascii_alpha_char,
.isupper_char = yp_encoding_ascii_isupper_char,
.char_width = pm_encoding_ascii_char_width,
.alnum_char = pm_encoding_ascii_alnum_char,
.alpha_char = pm_encoding_ascii_alpha_char,
.isupper_char = pm_encoding_ascii_isupper_char,
.multibyte = false
};
yp_encoding_t yp_encoding_ascii_8bit = {
pm_encoding_t pm_encoding_ascii_8bit = {
.name = "ascii-8bit",
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_ascii_alnum_char,
.alpha_char = yp_encoding_ascii_alpha_char,
.isupper_char = yp_encoding_ascii_isupper_char,
.char_width = pm_encoding_single_char_width,
.alnum_char = pm_encoding_ascii_alnum_char,
.alpha_char = pm_encoding_ascii_alpha_char,
.isupper_char = pm_encoding_ascii_isupper_char,
.multibyte = false
};
#define YP_ENCODING_TABLE(s, i, w) \
static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
#define PRISM_ENCODING_TABLE(s, i, w) \
static size_t pm_encoding_ ##i ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
} \
static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
static size_t pm_encoding_ ##i ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
} \
static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
static bool pm_encoding_ ##i ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
} \
yp_encoding_t yp_encoding_ ##i = { \
pm_encoding_t pm_encoding_ ##i = { \
.name = s, \
.char_width = w, \
.alnum_char = yp_encoding_ ##i ## _alnum_char, \
.alpha_char = yp_encoding_ ##i ## _alpha_char, \
.isupper_char = yp_encoding_ ##i ## _isupper_char, \
.alnum_char = pm_encoding_ ##i ## _alnum_char, \
.alpha_char = pm_encoding_ ##i ## _alpha_char, \
.isupper_char = pm_encoding_ ##i ## _isupper_char, \
.multibyte = false, \
};
YP_ENCODING_TABLE("iso-8859-1", iso_8859_1, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-2", iso_8859_2, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-3", iso_8859_3, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-4", iso_8859_4, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-5", iso_8859_5, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-6", iso_8859_6, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-7", iso_8859_7, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-8", iso_8859_8, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-9", iso_8859_9, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-10", iso_8859_10, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-11", iso_8859_11, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-13", iso_8859_13, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-14", iso_8859_14, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-15", iso_8859_15, yp_encoding_single_char_width)
YP_ENCODING_TABLE("iso-8859-16", iso_8859_16, yp_encoding_single_char_width)
YP_ENCODING_TABLE("koi8-r", koi8_r, yp_encoding_koi8_r_char_width)
YP_ENCODING_TABLE("windows-1251", windows_1251, yp_encoding_single_char_width)
YP_ENCODING_TABLE("windows-1252", windows_1252, yp_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width)
PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width)
PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width)
#undef YP_ENCODING_TABLE
#undef PRISM_ENCODING_TABLE

Просмотреть файл

@ -1,16 +1,16 @@
// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
typedef uint32_t yp_unicode_codepoint_t;
typedef uint32_t pm_unicode_codepoint_t;
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding unicode codepoint. Note that
// this table is different from other encodings where we used a lookup table
// because the indices of those tables are the byte representations, not the
// codepoints themselves.
const uint8_t yp_encoding_unicode_table[256] = {
const uint8_t pm_encoding_unicode_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -31,7 +31,7 @@ const uint8_t yp_encoding_unicode_table[256] = {
};
#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
0x2E0, 0x2E4,
@ -760,7 +760,7 @@ static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
};
#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
0x2E0, 0x2E4,
@ -1528,7 +1528,7 @@ static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
};
#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1296
static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
0x100, 0x100,
0x102, 0x102,
0x104, 0x104,
@ -2180,7 +2180,7 @@ static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
};
static bool
yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_codepoint_t *codepoints, size_t size) {
pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
size_t start = 0;
size_t end = size;
@ -2202,7 +2202,7 @@ yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_co
return false;
}
static const uint8_t yp_utf_8_dfa[] = {
static const uint8_t pm_utf_8_dfa[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
@ -2219,8 +2219,8 @@ static const uint8_t yp_utf_8_dfa[] = {
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
};
static yp_unicode_codepoint_t
yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
static pm_unicode_codepoint_t
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
assert(n >= 1);
size_t maximum = (size_t) n;
@ -2229,16 +2229,16 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
for (size_t index = 0; index < 4 && index < maximum; index++) {
uint32_t byte = b[index];
uint32_t type = yp_utf_8_dfa[byte];
uint32_t type = pm_utf_8_dfa[byte];
codepoint = (state != 0) ?
(byte & 0x3fu) | (codepoint << 6) :
(0xffu >> type) & (byte);
state = yp_utf_8_dfa[256 + (state * 16) + type];
state = pm_utf_8_dfa[256 + (state * 16) + type];
if (!state) {
*width = index + 1;
return (yp_unicode_codepoint_t) codepoint;
return (pm_unicode_codepoint_t) codepoint;
}
}
@ -2247,57 +2247,57 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
}
static size_t
yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
size_t width;
yp_utf_8_codepoint(b, n, &width);
pm_utf_8_codepoint(b, n, &width);
return width;
}
size_t
yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (*b < 0x80) {
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
size_t width;
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
if (codepoint <= 0xFF) {
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
} else {
return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
}
}
size_t
yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (*b < 0x80) {
return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
size_t width;
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
if (codepoint <= 0xFF) {
return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
} else {
return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
}
}
static bool
yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (*b < 0x80) {
return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
size_t width;
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
if (codepoint <= 0xFF) {
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
} else {
return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
}
}
@ -2305,20 +2305,20 @@ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
yp_encoding_t yp_encoding_utf_8 = {
pm_encoding_t pm_encoding_utf_8 = {
.name = "utf-8",
.char_width = yp_encoding_utf_8_char_width,
.alnum_char = yp_encoding_utf_8_alnum_char,
.alpha_char = yp_encoding_utf_8_alpha_char,
.isupper_char = yp_encoding_utf_8_isupper_char,
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
.isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};
yp_encoding_t yp_encoding_utf8_mac = {
pm_encoding_t pm_encoding_utf8_mac = {
.name = "utf8-mac",
.char_width = yp_encoding_utf_8_char_width,
.alnum_char = yp_encoding_utf_8_alnum_char,
.alpha_char = yp_encoding_utf_8_alpha_char,
.isupper_char = yp_encoding_utf_8_isupper_char,
.char_width = pm_encoding_utf_8_char_width,
.alnum_char = pm_encoding_utf_8_alnum_char,
.alpha_char = pm_encoding_utf_8_alpha_char,
.isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -1,7 +1,7 @@
#include "yarp/enc/yp_encoding.h"
#include "prism/enc/pm_encoding.h"
static size_t
yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
pm_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
return 1;
@ -20,37 +20,37 @@ yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
}
static size_t
yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
return yp_encoding_ascii_alpha_char(b, n);
pm_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_windows_31j_char_width(b, n) == 1) {
return pm_encoding_ascii_alpha_char(b, n);
} else {
return 0;
}
}
static size_t
yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
return yp_encoding_ascii_alnum_char(b, n);
pm_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_windows_31j_char_width(b, n) == 1) {
return pm_encoding_ascii_alnum_char(b, n);
} else {
return 0;
}
}
static bool
yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
return yp_encoding_ascii_isupper_char(b, n);
pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (pm_encoding_windows_31j_char_width(b, n) == 1) {
return pm_encoding_ascii_isupper_char(b, n);
} else {
return false;
}
}
yp_encoding_t yp_encoding_windows_31j = {
pm_encoding_t pm_encoding_windows_31j = {
.name = "windows-31j",
.char_width = yp_encoding_windows_31j_char_width,
.alnum_char = yp_encoding_windows_31j_alnum_char,
.alpha_char = yp_encoding_windows_31j_alpha_char,
.isupper_char = yp_encoding_windows_31j_isupper_char,
.char_width = pm_encoding_windows_31j_char_width,
.alnum_char = pm_encoding_windows_31j_alnum_char,
.alpha_char = pm_encoding_windows_31j_alpha_char,
.isupper_char = pm_encoding_windows_31j_isupper_char,
.multibyte = true
};

Просмотреть файл

@ -1,18 +1,18 @@
#include "yarp/extension.h"
#include "prism/extension.h"
// NOTE: this file should contain only bindings.
// All non-trivial logic should be in librubyparser so it can be shared its the various callers.
VALUE rb_cYARP;
VALUE rb_cYARPNode;
VALUE rb_cYARPSource;
VALUE rb_cYARPToken;
VALUE rb_cYARPLocation;
VALUE rb_cPrism;
VALUE rb_cPrismNode;
VALUE rb_cPrismSource;
VALUE rb_cPrismToken;
VALUE rb_cPrismLocation;
VALUE rb_cYARPComment;
VALUE rb_cYARPParseError;
VALUE rb_cYARPParseWarning;
VALUE rb_cYARPParseResult;
VALUE rb_cPrismComment;
VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
VALUE rb_cPrismParseResult;
/******************************************************************************/
/* IO of Ruby code */
@ -37,15 +37,15 @@ check_string(VALUE value) {
return RSTRING_PTR(value);
}
// Load the contents and size of the given string into the given yp_string_t.
// Load the contents and size of the given string into the given pm_string_t.
static void
input_load_string(yp_string_t *input, VALUE string) {
input_load_string(pm_string_t *input, VALUE string) {
// Check if the string is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(string, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
}
yp_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
}
/******************************************************************************/
@ -54,22 +54,22 @@ input_load_string(yp_string_t *input, VALUE string) {
// Dump the AST corresponding to the given input to a string.
static VALUE
dump_input(yp_string_t *input, const char *filepath) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) {
dump_input(pm_string_t *input, const char *filepath) {
pm_buffer_t buffer;
if (!pm_buffer_init(&buffer)) {
rb_raise(rb_eNoMemError, "failed to allocate memory");
}
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
yp_node_t *node = yp_parse(&parser);
yp_serialize(&parser, node, &buffer);
pm_node_t *node = pm_parse(&parser);
pm_serialize(&parser, node, &buffer);
VALUE result = rb_str_new(yp_buffer_value(&buffer), yp_buffer_length(&buffer));
yp_node_destroy(&parser, node);
yp_buffer_free(&buffer);
yp_parser_free(&parser);
VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
pm_node_destroy(&parser, node);
pm_buffer_free(&buffer);
pm_parser_free(&parser);
return result;
}
@ -81,19 +81,19 @@ dump(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
yp_string_t input;
pm_string_t input;
input_load_string(&input, string);
#ifdef YARP_DEBUG_MODE_BUILD
size_t length = yp_string_length(&input);
#ifdef PRISM_DEBUG_MODE_BUILD
size_t length = pm_string_length(&input);
char* dup = malloc(length);
memcpy(dup, yp_string_source(&input), length);
yp_string_constant_init(&input, dup, length);
memcpy(dup, pm_string_source(&input), length);
pm_string_constant_init(&input, dup, length);
#endif
VALUE value = dump_input(&input, check_string(filepath));
#ifdef YARP_DEBUG_MODE_BUILD
#ifdef PRISM_DEBUG_MODE_BUILD
free(dup);
#endif
@ -103,13 +103,13 @@ dump(int argc, VALUE *argv, VALUE self) {
// Dump the AST corresponding to the given file to a string.
static VALUE
dump_file(VALUE self, VALUE filepath) {
yp_string_t input;
pm_string_t input;
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
if (!pm_string_mapped_init(&input, checked)) return Qnil;
VALUE value = dump_input(&input, checked);
yp_string_free(&input);
pm_string_free(&input);
return value;
}
@ -120,10 +120,10 @@ dump_file(VALUE self, VALUE filepath) {
// Extract the comments out of the parser into an array.
static VALUE
parser_comments(yp_parser_t *parser, VALUE source) {
parser_comments(pm_parser_t *parser, VALUE source) {
VALUE comments = rb_ary_new();
for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
VALUE location_argv[] = {
source,
LONG2FIX(comment->start - parser->start),
@ -132,13 +132,13 @@ parser_comments(yp_parser_t *parser, VALUE source) {
VALUE type;
switch (comment->type) {
case YP_COMMENT_INLINE:
case PM_COMMENT_INLINE:
type = ID2SYM(rb_intern("inline"));
break;
case YP_COMMENT_EMBDOC:
case PM_COMMENT_EMBDOC:
type = ID2SYM(rb_intern("embdoc"));
break;
case YP_COMMENT___END__:
case PM_COMMENT___END__:
type = ID2SYM(rb_intern("__END__"));
break;
default:
@ -146,8 +146,8 @@ parser_comments(yp_parser_t *parser, VALUE source) {
break;
}
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
}
return comments;
@ -155,11 +155,11 @@ parser_comments(yp_parser_t *parser, VALUE source) {
// Extract the errors out of the parser into an array.
static VALUE
parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE errors = rb_ary_new();
yp_diagnostic_t *error;
pm_diagnostic_t *error;
for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
VALUE location_argv[] = {
source,
LONG2FIX(error->start - parser->start),
@ -168,10 +168,10 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE error_argv[] = {
rb_enc_str_new_cstr(error->message, encoding),
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
rb_class_new_instance(3, location_argv, rb_cPrismLocation)
};
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
}
return errors;
@ -179,11 +179,11 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
// Extract the warnings out of the parser into an array.
static VALUE
parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE warnings = rb_ary_new();
yp_diagnostic_t *warning;
pm_diagnostic_t *warning;
for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
VALUE location_argv[] = {
source,
LONG2FIX(warning->start - parser->start),
@ -192,10 +192,10 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE warning_argv[] = {
rb_enc_str_new_cstr(warning->message, encoding),
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
rb_class_new_instance(3, location_argv, rb_cPrismLocation)
};
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
}
return warnings;
@ -218,11 +218,11 @@ typedef struct {
// token is found. Once found, we initialize a new instance of Token and push it
// onto the tokens array.
static void
parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
VALUE yields = rb_ary_new_capa(2);
rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
rb_ary_push(yields, INT2FIX(parser->lex_state));
rb_ary_push(parse_lex_data->tokens, yields);
@ -232,7 +232,7 @@ parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
// the top of the file. We use it to update the encoding that we are using to
// create tokens.
static void
parse_lex_encoding_changed_callback(yp_parser_t *parser) {
parse_lex_encoding_changed_callback(pm_parser_t *parser) {
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
@ -254,14 +254,14 @@ parse_lex_encoding_changed_callback(yp_parser_t *parser) {
// Parse the given input and return a ParseResult containing just the tokens or
// the nodes and tokens.
static VALUE
parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
VALUE offsets = rb_ary_new();
VALUE source_argv[] = { rb_str_new((const char *) yp_string_source(input), yp_string_length(input)), offsets };
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
parse_lex_data_t parse_lex_data = {
.source = source,
@ -270,13 +270,13 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
};
parse_lex_data_t *data = &parse_lex_data;
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
.data = (void *) data,
.callback = parse_lex_token,
};
parser.lex_callback = &lex_callback;
yp_node_t *node = yp_parse(&parser);
pm_node_t *node = pm_parse(&parser);
// Here we need to update the source range to have the correct newline
// offsets. We do it here because we've already created the object and given
@ -288,7 +288,7 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
VALUE value;
if (return_nodes) {
value = rb_ary_new_capa(2);
rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
rb_ary_push(value, parse_lex_data.tokens);
} else {
value = parse_lex_data.tokens;
@ -302,9 +302,9 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
source
};
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
return rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
}
// Return an array of tokens corresponding to the given string.
@ -314,7 +314,7 @@ lex(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
yp_string_t input;
pm_string_t input;
input_load_string(&input, string);
return parse_lex_input(&input, check_string(filepath), false);
@ -323,13 +323,13 @@ lex(int argc, VALUE *argv, VALUE self) {
// Return an array of tokens corresponding to the given file.
static VALUE
lex_file(VALUE self, VALUE filepath) {
yp_string_t input;
pm_string_t input;
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
if (!pm_string_mapped_init(&input, checked)) return Qnil;
VALUE value = parse_lex_input(&input, checked, false);
yp_string_free(&input);
pm_string_free(&input);
return value;
}
@ -340,26 +340,26 @@ lex_file(VALUE self, VALUE filepath) {
// Parse the given input and return a ParseResult instance.
static VALUE
parse_input(yp_string_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
parse_input(pm_string_t *input, const char *filepath) {
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
yp_node_t *node = yp_parse(&parser);
pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
VALUE source = yp_source_new(&parser, encoding);
VALUE source = pm_source_new(&parser, encoding);
VALUE result_argv[] = {
yp_ast_new(&parser, node, encoding),
pm_ast_new(&parser, node, encoding),
parser_comments(&parser, source),
parser_errors(&parser, encoding, source),
parser_warnings(&parser, encoding, source),
source
};
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
return result;
}
@ -371,19 +371,19 @@ parse(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
yp_string_t input;
pm_string_t input;
input_load_string(&input, string);
#ifdef YARP_DEBUG_MODE_BUILD
size_t length = yp_string_length(&input);
#ifdef PRISM_DEBUG_MODE_BUILD
size_t length = pm_string_length(&input);
char* dup = malloc(length);
memcpy(dup, yp_string_source(&input), length);
yp_string_constant_init(&input, dup, length);
memcpy(dup, pm_string_source(&input), length);
pm_string_constant_init(&input, dup, length);
#endif
VALUE value = parse_input(&input, check_string(filepath));
#ifdef YARP_DEBUG_MODE_BUILD
#ifdef PRISM_DEBUG_MODE_BUILD
free(dup);
#endif
@ -393,13 +393,13 @@ parse(int argc, VALUE *argv, VALUE self) {
// Parse the given file and return a ParseResult instance.
static VALUE
parse_file(VALUE self, VALUE filepath) {
yp_string_t input;
pm_string_t input;
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
if (!pm_string_mapped_init(&input, checked)) return Qnil;
VALUE value = parse_input(&input, checked);
yp_string_free(&input);
pm_string_free(&input);
return value;
}
@ -411,11 +411,11 @@ parse_lex(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
yp_string_t input;
pm_string_t input;
input_load_string(&input, string);
VALUE value = parse_lex_input(&input, check_string(filepath), true);
yp_string_free(&input);
pm_string_free(&input);
return value;
}
@ -423,13 +423,13 @@ parse_lex(int argc, VALUE *argv, VALUE self) {
// Parse and lex the given file and return a ParseResult instance.
static VALUE
parse_lex_file(VALUE self, VALUE filepath) {
yp_string_t input;
pm_string_t input;
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
if (!pm_string_mapped_init(&input, checked)) return Qnil;
VALUE value = parse_lex_input(&input, checked, true);
yp_string_free(&input);
pm_string_free(&input);
return value;
}
@ -439,40 +439,40 @@ parse_lex_file(VALUE self, VALUE filepath) {
/******************************************************************************/
// Returns an array of strings corresponding to the named capture groups in the
// given source string. If YARP was unable to parse the regular expression, this
// given source string. If prism was unable to parse the regular expression, this
// function returns nil.
static VALUE
named_captures(VALUE self, VALUE source) {
yp_string_list_t string_list;
yp_string_list_init(&string_list);
pm_string_list_t string_list;
pm_string_list_init(&string_list);
if (!yp_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
yp_string_list_free(&string_list);
if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
pm_string_list_free(&string_list);
return Qnil;
}
VALUE names = rb_ary_new();
for (size_t index = 0; index < string_list.length; index++) {
const yp_string_t *string = &string_list.strings[index];
rb_ary_push(names, rb_str_new((const char *) yp_string_source(string), yp_string_length(string)));
const pm_string_t *string = &string_list.strings[index];
rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
}
yp_string_list_free(&string_list);
pm_string_list_free(&string_list);
return names;
}
// Accepts a source string and a type of unescaping and returns the unescaped
// version.
static VALUE
unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_string_t result;
unescape(VALUE source, pm_unescape_type_t unescape_type) {
pm_string_t result;
if (yp_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
VALUE str = rb_str_new((const char *) yp_string_source(&result), yp_string_length(&result));
yp_string_free(&result);
if (pm_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
VALUE str = rb_str_new((const char *) pm_string_source(&result), pm_string_length(&result));
pm_string_free(&result);
return str;
} else {
yp_string_free(&result);
pm_string_free(&result);
return Qnil;
}
}
@ -481,41 +481,41 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
// consistent API.
static VALUE
unescape_none(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_NONE);
return unescape(source, PM_UNESCAPE_NONE);
}
// Minimally unescape the given string. This means effectively unescaping just
// the quotes of a string. Returns the unescaped string.
static VALUE
unescape_minimal(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_MINIMAL);
return unescape(source, PM_UNESCAPE_MINIMAL);
}
// Escape the given string minimally plus whitespace. Returns the unescaped string.
static VALUE
unescape_whitespace(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_WHITESPACE);
return unescape(source, PM_UNESCAPE_WHITESPACE);
}
// Unescape everything in the given string. Return the unescaped string.
static VALUE
unescape_all(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_ALL);
return unescape(source, PM_UNESCAPE_ALL);
}
// Return a hash of information about the given source string's memory usage.
static VALUE
memsize(VALUE self, VALUE string) {
yp_parser_t parser;
pm_parser_t parser;
size_t length = RSTRING_LEN(string);
yp_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
yp_node_t *node = yp_parse(&parser);
yp_memsize_t memsize;
yp_node_memsize(node, &memsize);
pm_node_t *node = pm_parse(&parser);
pm_memsize_t memsize;
pm_node_memsize(node, &memsize);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
VALUE result = rb_hash_new();
rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
@ -528,19 +528,19 @@ memsize(VALUE self, VALUE string) {
// parser for memory and speed.
static VALUE
profile_file(VALUE self, VALUE filepath) {
yp_string_t input;
pm_string_t input;
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
if (!pm_string_mapped_init(&input, checked)) return Qnil;
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(&input), yp_string_length(&input), checked);
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
yp_node_t *node = yp_parse(&parser);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
pm_node_t *node = pm_parse(&parser);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
yp_string_free(&input);
pm_string_free(&input);
return Qnil;
}
@ -549,18 +549,18 @@ profile_file(VALUE self, VALUE filepath) {
// path since it is used by client libraries.
static VALUE
parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
yp_string_t input;
yp_buffer_t buffer;
yp_buffer_init(&buffer);
pm_string_t input;
pm_buffer_t buffer;
pm_buffer_init(&buffer);
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
if (!pm_string_mapped_init(&input, checked)) return Qnil;
yp_parse_serialize(yp_string_source(&input), yp_string_length(&input), &buffer, check_string(metadata));
VALUE result = rb_str_new(yp_buffer_value(&buffer), yp_buffer_length(&buffer));
pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
yp_string_free(&input);
yp_buffer_free(&buffer);
pm_string_free(&input);
pm_buffer_free(&buffer);
return result;
}
@ -569,58 +569,58 @@ parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
/******************************************************************************/
RUBY_FUNC_EXPORTED void
Init_yarp(void) {
// Make sure that the YARP library version matches the expected version.
Init_prism(void) {
// Make sure that the prism library version matches the expected version.
// Otherwise something was compiled incorrectly.
if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
rb_raise(
rb_eRuntimeError,
"The YARP library version (%s) does not match the expected version (%s)",
yp_version(),
EXPECTED_YARP_VERSION
"The prism library version (%s) does not match the expected version (%s)",
pm_version(),
EXPECTED_PRISM_VERSION
);
}
// Grab up references to all of the constants that we're going to need to
// reference throughout this extension.
rb_cYARP = rb_define_module("YARP");
rb_cYARPNode = rb_define_class_under(rb_cYARP, "Node", rb_cObject);
rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
rb_cPrism = rb_define_module("Prism");
rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
// Define the version string here so that we can use the constants defined
// in yarp.h.
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
// in prism.h.
rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
// First, the functions that have to do with lexing and parsing.
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
// Next, the functions that will be called by the parser to perform various
// internal tasks. We expose these to make them easier to test.
VALUE rb_cYARPDebug = rb_define_module_under(rb_cYARP, "Debug");
rb_define_singleton_method(rb_cYARPDebug, "named_captures", named_captures, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_none", unescape_none, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_minimal", unescape_minimal, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_whitespace", unescape_whitespace, 1);
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);
rb_define_singleton_method(rb_cYARPDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
rb_define_singleton_method(rb_cPrismDebug, "unescape_none", unescape_none, 1);
rb_define_singleton_method(rb_cPrismDebug, "unescape_minimal", unescape_minimal, 1);
rb_define_singleton_method(rb_cPrismDebug, "unescape_whitespace", unescape_whitespace, 1);
rb_define_singleton_method(rb_cPrismDebug, "unescape_all", unescape_all, 1);
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
// Next, initialize the other APIs.
Init_yarp_api_node();
Init_yarp_pack();
Init_prism_api_node();
Init_prism_pack();
}

Просмотреть файл

@ -1,18 +1,18 @@
#ifndef YARP_EXT_NODE_H
#define YARP_EXT_NODE_H
#ifndef PRISM_EXT_NODE_H
#define PRISM_EXT_NODE_H
#define EXPECTED_YARP_VERSION "0.12.0"
#define EXPECTED_PRISM_VERSION "0.12.0"
#include <ruby.h>
#include <ruby/encoding.h>
#include "yarp.h"
#include "prism.h"
VALUE yp_source_new(yp_parser_t *parser, rb_encoding *encoding);
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
void Init_yarp_api_node(void);
void Init_yarp_pack(void);
YP_EXPORTED_FUNCTION void Init_yarp(void);
void Init_prism_api_node(void);
void Init_prism_pack(void);
PRISM_EXPORTED_FUNCTION void Init_prism(void);
#endif

Просмотреть файл

@ -1,42 +1,41 @@
#ifndef YARP_NODE_H
#define YARP_NODE_H
#ifndef PRISM_NODE_H
#define PRISM_NODE_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include "prism/defines.h"
#include "prism/parser.h"
// Append a new node onto the end of the node list.
void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
// Clear the node but preserves the location.
void yp_node_clear(yp_node_t *node);
void pm_node_clear(pm_node_t *node);
// Deallocate a node and all of its children.
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
// This struct stores the information gathered by the yp_node_memsize function.
// This struct stores the information gathered by the pm_node_memsize function.
// It contains both the memory footprint and additionally metadata about the
// shape of the tree.
typedef struct {
size_t memsize;
size_t node_count;
} yp_memsize_t;
} pm_memsize_t;
// Calculates the memory footprint of a given node.
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
// Returns a string representation of the given node type.
YP_EXPORTED_FUNCTION const char * yp_node_type_to_str(yp_node_type_t node_type);
PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
#define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
#define PM_EMPTY_NODE_LIST ((pm_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
#endif // YARP_NODE_H
// ScopeNodes are helper nodes, and will never be part of the AST. We manually
// declare them here to avoid generating them.
typedef struct pm_scope_node {
pm_node_t base;
struct pm_parameters_node *parameters;
pm_node_t *body;
pm_constant_id_list_t locals;
} pm_scope_node_t;
// ScopeNodes are helper nodes, and will never
// be part of the AST. We manually declare them
// here to avoid generating them
typedef struct yp_scope_node {
yp_node_t base;
struct yp_parameters_node *parameters;
yp_node_t *body;
yp_constant_id_list_t locals;
} yp_scope_node_t;
#endif // PRISM_NODE_H

Просмотреть файл

@ -1,4 +1,4 @@
#include "yarp/pack.h"
#include "prism/pack.h"
#include <stdbool.h>
#include <errno.h>
@ -6,25 +6,25 @@
static uintmax_t
strtoumaxc(const char **format);
YP_EXPORTED_FUNCTION yp_pack_result
yp_pack_parse(yp_pack_variant variant, const char **format, const char *format_end,
yp_pack_type *type, yp_pack_signed *signed_type, yp_pack_endian *endian, yp_pack_size *size,
yp_pack_length_type *length_type, uint64_t *length, yp_pack_encoding *encoding) {
PRISM_EXPORTED_FUNCTION pm_pack_result
pm_pack_parse(pm_pack_variant variant, const char **format, const char *format_end,
pm_pack_type *type, pm_pack_signed *signed_type, pm_pack_endian *endian, pm_pack_size *size,
pm_pack_length_type *length_type, uint64_t *length, pm_pack_encoding *encoding) {
if (*encoding == YP_PACK_ENCODING_START) {
*encoding = YP_PACK_ENCODING_US_ASCII;
if (*encoding == PM_PACK_ENCODING_START) {
*encoding = PM_PACK_ENCODING_US_ASCII;
}
if (*format == format_end) {
*type = YP_PACK_END;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*length_type = YP_PACK_LENGTH_NA;
return YP_PACK_OK;
*type = PM_PACK_END;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
*length_type = PM_PACK_LENGTH_NA;
return PM_PACK_OK;
}
*length_type = YP_PACK_LENGTH_FIXED;
*length_type = PM_PACK_LENGTH_FIXED;
*length = 1;
bool length_changed_allowed = true;
@ -37,268 +37,268 @@ yp_pack_parse(yp_pack_variant variant, const char **format, const char *format_e
case '\v':
case '\f':
case '\r':
*type = YP_PACK_SPACE;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*length_type = YP_PACK_LENGTH_NA;
*type = PM_PACK_SPACE;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
*length_type = PM_PACK_LENGTH_NA;
*length = 0;
return YP_PACK_OK;
return PM_PACK_OK;
case '#':
while ((*format < format_end) && (**format != '\n')) {
(*format)++;
}
*type = YP_PACK_COMMENT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*length_type = YP_PACK_LENGTH_NA;
*type = PM_PACK_COMMENT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
*length_type = PM_PACK_LENGTH_NA;
*length = 0;
return YP_PACK_OK;
return PM_PACK_OK;
case 'C':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_AGNOSTIC_ENDIAN;
*size = YP_PACK_SIZE_8;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_AGNOSTIC_ENDIAN;
*size = PM_PACK_SIZE_8;
break;
case 'S':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_16;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_16;
break;
case 'L':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_32;
break;
case 'Q':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_64;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_64;
break;
case 'J':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_P;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_P;
break;
case 'c':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_SIGNED;
*endian = YP_PACK_AGNOSTIC_ENDIAN;
*size = YP_PACK_SIZE_8;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_SIGNED;
*endian = PM_PACK_AGNOSTIC_ENDIAN;
*size = PM_PACK_SIZE_8;
break;
case 's':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_SIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_16;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_SIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_16;
break;
case 'l':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_SIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_SIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_32;
break;
case 'q':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_SIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_64;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_SIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_64;
break;
case 'j':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_SIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_P;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_SIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_P;
break;
case 'I':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_INT;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_INT;
break;
case 'i':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_SIGNED;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_INT;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_SIGNED;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_INT;
break;
case 'n':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_BIG_ENDIAN;
*size = YP_PACK_SIZE_16;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_BIG_ENDIAN;
*size = PM_PACK_SIZE_16;
length_changed_allowed = false;
break;
case 'N':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_BIG_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_BIG_ENDIAN;
*size = PM_PACK_SIZE_32;
length_changed_allowed = false;
break;
case 'v':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_LITTLE_ENDIAN;
*size = YP_PACK_SIZE_16;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_LITTLE_ENDIAN;
*size = PM_PACK_SIZE_16;
length_changed_allowed = false;
break;
case 'V':
*type = YP_PACK_INTEGER;
*signed_type = YP_PACK_UNSIGNED;
*endian = YP_PACK_LITTLE_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_INTEGER;
*signed_type = PM_PACK_UNSIGNED;
*endian = PM_PACK_LITTLE_ENDIAN;
*size = PM_PACK_SIZE_32;
length_changed_allowed = false;
break;
case 'U':
*type = YP_PACK_UTF8;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_UTF8;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'w':
*type = YP_PACK_BER;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_BER;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'D':
case 'd':
*type = YP_PACK_FLOAT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_64;
*type = PM_PACK_FLOAT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_64;
break;
case 'F':
case 'f':
*type = YP_PACK_FLOAT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_NATIVE_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_FLOAT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_NATIVE_ENDIAN;
*size = PM_PACK_SIZE_32;
break;
case 'E':
*type = YP_PACK_FLOAT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_LITTLE_ENDIAN;
*size = YP_PACK_SIZE_64;
*type = PM_PACK_FLOAT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_LITTLE_ENDIAN;
*size = PM_PACK_SIZE_64;
break;
case 'e':
*type = YP_PACK_FLOAT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_LITTLE_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_FLOAT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_LITTLE_ENDIAN;
*size = PM_PACK_SIZE_32;
break;
case 'G':
*type = YP_PACK_FLOAT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_BIG_ENDIAN;
*size = YP_PACK_SIZE_64;
*type = PM_PACK_FLOAT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_BIG_ENDIAN;
*size = PM_PACK_SIZE_64;
break;
case 'g':
*type = YP_PACK_FLOAT;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_BIG_ENDIAN;
*size = YP_PACK_SIZE_32;
*type = PM_PACK_FLOAT;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_BIG_ENDIAN;
*size = PM_PACK_SIZE_32;
break;
case 'A':
*type = YP_PACK_STRING_SPACE_PADDED;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_SPACE_PADDED;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'a':
*type = YP_PACK_STRING_NULL_PADDED;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_NULL_PADDED;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'Z':
*type = YP_PACK_STRING_NULL_TERMINATED;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_NULL_TERMINATED;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'B':
*type = YP_PACK_STRING_MSB;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_MSB;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'b':
*type = YP_PACK_STRING_LSB;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_LSB;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'H':
*type = YP_PACK_STRING_HEX_HIGH;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_HEX_HIGH;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'h':
*type = YP_PACK_STRING_HEX_LOW;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_HEX_LOW;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'u':
*type = YP_PACK_STRING_UU;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_UU;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'M':
*type = YP_PACK_STRING_MIME;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_MIME;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'm':
*type = YP_PACK_STRING_BASE64;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_BASE64;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'P':
*type = YP_PACK_STRING_FIXED;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_FIXED;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'p':
*type = YP_PACK_STRING_POINTER;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_STRING_POINTER;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case '@':
*type = YP_PACK_MOVE;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_MOVE;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'X':
*type = YP_PACK_BACK;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_BACK;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case 'x':
*type = YP_PACK_NULL;
*signed_type = YP_PACK_SIGNED_NA;
*endian = YP_PACK_ENDIAN_NA;
*size = YP_PACK_SIZE_NA;
*type = PM_PACK_NULL;
*signed_type = PM_PACK_SIGNED_NA;
*endian = PM_PACK_ENDIAN_NA;
*size = PM_PACK_SIZE_NA;
break;
case '%':
return YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
default:
return YP_PACK_ERROR_UNKNOWN_DIRECTIVE;
return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
}
bool explicit_endian = false;
@ -308,44 +308,44 @@ yp_pack_parse(yp_pack_variant variant, const char **format, const char *format_e
case '_':
case '!':
(*format)++;
if (*type != YP_PACK_INTEGER || !length_changed_allowed) {
return YP_PACK_ERROR_BANG_NOT_ALLOWED;
if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
return PM_PACK_ERROR_BANG_NOT_ALLOWED;
}
switch (*size) {
case YP_PACK_SIZE_SHORT:
case YP_PACK_SIZE_INT:
case YP_PACK_SIZE_LONG:
case YP_PACK_SIZE_LONG_LONG:
case PM_PACK_SIZE_SHORT:
case PM_PACK_SIZE_INT:
case PM_PACK_SIZE_LONG:
case PM_PACK_SIZE_LONG_LONG:
break;
case YP_PACK_SIZE_16:
*size = YP_PACK_SIZE_SHORT;
case PM_PACK_SIZE_16:
*size = PM_PACK_SIZE_SHORT;
break;
case YP_PACK_SIZE_32:
*size = YP_PACK_SIZE_LONG;
case PM_PACK_SIZE_32:
*size = PM_PACK_SIZE_LONG;
break;
case YP_PACK_SIZE_64:
*size = YP_PACK_SIZE_LONG_LONG;
case PM_PACK_SIZE_64:
*size = PM_PACK_SIZE_LONG_LONG;
break;
case YP_PACK_SIZE_P:
case PM_PACK_SIZE_P:
break;
default:
return YP_PACK_ERROR_BANG_NOT_ALLOWED;
return PM_PACK_ERROR_BANG_NOT_ALLOWED;
}
break;
case '<':
(*format)++;
if (explicit_endian) {
return YP_PACK_ERROR_DOUBLE_ENDIAN;
return PM_PACK_ERROR_DOUBLE_ENDIAN;
}
*endian = YP_PACK_LITTLE_ENDIAN;
*endian = PM_PACK_LITTLE_ENDIAN;
explicit_endian = true;
break;
case '>':
(*format)++;
if (explicit_endian) {
return YP_PACK_ERROR_DOUBLE_ENDIAN;
return PM_PACK_ERROR_DOUBLE_ENDIAN;
}
*endian = YP_PACK_BIG_ENDIAN;
*endian = PM_PACK_BIG_ENDIAN;
explicit_endian = true;
break;
default:
@ -355,64 +355,64 @@ yp_pack_parse(yp_pack_variant variant, const char **format, const char *format_e
exit_modifier_loop:
if (variant == YP_PACK_VARIANT_UNPACK && *type == YP_PACK_MOVE) {
if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
*length = 0;
}
if (*format < format_end) {
if (**format == '*') {
switch (*type) {
case YP_PACK_NULL:
case YP_PACK_BACK:
case PM_PACK_NULL:
case PM_PACK_BACK:
switch (variant) {
case YP_PACK_VARIANT_PACK:
*length_type = YP_PACK_LENGTH_FIXED;
case PM_PACK_VARIANT_PACK:
*length_type = PM_PACK_LENGTH_FIXED;
break;
case YP_PACK_VARIANT_UNPACK:
*length_type = YP_PACK_LENGTH_MAX;
case PM_PACK_VARIANT_UNPACK:
*length_type = PM_PACK_LENGTH_MAX;
break;
}
*length = 0;
break;
case YP_PACK_MOVE:
case PM_PACK_MOVE:
switch (variant) {
case YP_PACK_VARIANT_PACK:
*length_type = YP_PACK_LENGTH_FIXED;
case PM_PACK_VARIANT_PACK:
*length_type = PM_PACK_LENGTH_FIXED;
break;
case YP_PACK_VARIANT_UNPACK:
*length_type = YP_PACK_LENGTH_RELATIVE;
case PM_PACK_VARIANT_UNPACK:
*length_type = PM_PACK_LENGTH_RELATIVE;
break;
}
*length = 0;
break;
case YP_PACK_STRING_UU:
*length_type = YP_PACK_LENGTH_FIXED;
case PM_PACK_STRING_UU:
*length_type = PM_PACK_LENGTH_FIXED;
*length = 0;
break;
case YP_PACK_STRING_FIXED:
case PM_PACK_STRING_FIXED:
switch (variant) {
case YP_PACK_VARIANT_PACK:
*length_type = YP_PACK_LENGTH_FIXED;
case PM_PACK_VARIANT_PACK:
*length_type = PM_PACK_LENGTH_FIXED;
*length = 1;
break;
case YP_PACK_VARIANT_UNPACK:
*length_type = YP_PACK_LENGTH_MAX;
case PM_PACK_VARIANT_UNPACK:
*length_type = PM_PACK_LENGTH_MAX;
*length = 0;
break;
}
break;
case YP_PACK_STRING_MIME:
case YP_PACK_STRING_BASE64:
*length_type = YP_PACK_LENGTH_FIXED;
case PM_PACK_STRING_MIME:
case PM_PACK_STRING_BASE64:
*length_type = PM_PACK_LENGTH_FIXED;
*length = 1;
break;
default:
*length_type = YP_PACK_LENGTH_MAX;
*length_type = PM_PACK_LENGTH_MAX;
*length = 0;
break;
}
@ -420,59 +420,59 @@ exit_modifier_loop:
(*format)++;
} else if (**format >= '0' && **format <= '9') {
errno = 0;
*length_type = YP_PACK_LENGTH_FIXED;
*length_type = PM_PACK_LENGTH_FIXED;
#if UINTMAX_MAX < UINT64_MAX
#error "YARP's design assumes uintmax_t is at least as large as uint64_t"
#error "prism's design assumes uintmax_t is at least as large as uint64_t"
#endif
uintmax_t length_max = strtoumaxc(format);
if (errno || length_max > UINT64_MAX) {
return YP_PACK_ERROR_LENGTH_TOO_BIG;
return PM_PACK_ERROR_LENGTH_TOO_BIG;
}
*length = (uint64_t) length_max;
}
}
switch (*type) {
case YP_PACK_UTF8:
case PM_PACK_UTF8:
/* if encoding is US-ASCII, upgrade to UTF-8 */
if (*encoding == YP_PACK_ENCODING_US_ASCII) {
*encoding = YP_PACK_ENCODING_UTF_8;
if (*encoding == PM_PACK_ENCODING_US_ASCII) {
*encoding = PM_PACK_ENCODING_UTF_8;
}
break;
case YP_PACK_STRING_MIME:
case YP_PACK_STRING_BASE64:
case YP_PACK_STRING_UU:
case PM_PACK_STRING_MIME:
case PM_PACK_STRING_BASE64:
case PM_PACK_STRING_UU:
/* keep US-ASCII (do nothing) */
break;
default:
/* fall back to BINARY */
*encoding = YP_PACK_ENCODING_ASCII_8BIT;
*encoding = PM_PACK_ENCODING_ASCII_8BIT;
break;
}
return YP_PACK_OK;
return PM_PACK_OK;
}
YP_EXPORTED_FUNCTION size_t
yp_size_to_native(yp_pack_size size) {
PRISM_EXPORTED_FUNCTION size_t
pm_size_to_native(pm_pack_size size) {
switch (size) {
case YP_PACK_SIZE_SHORT:
case PM_PACK_SIZE_SHORT:
return sizeof(short);
case YP_PACK_SIZE_INT:
case PM_PACK_SIZE_INT:
return sizeof(int);
case YP_PACK_SIZE_LONG:
case PM_PACK_SIZE_LONG:
return sizeof(long);
case YP_PACK_SIZE_LONG_LONG:
case PM_PACK_SIZE_LONG_LONG:
return sizeof(long long);
case YP_PACK_SIZE_8:
case PM_PACK_SIZE_8:
return 1;
case YP_PACK_SIZE_16:
case PM_PACK_SIZE_16:
return 2;
case YP_PACK_SIZE_32:
case PM_PACK_SIZE_32:
return 4;
case YP_PACK_SIZE_64:
case PM_PACK_SIZE_64:
return 8;
case YP_PACK_SIZE_P:
case PM_PACK_SIZE_P:
return sizeof(void *);
default:
return 0;

Просмотреть файл

@ -1,141 +1,141 @@
#ifndef YARP_PACK_H
#define YARP_PACK_H
#ifndef PRISM_PACK_H
#define PRISM_PACK_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <stdint.h>
#include <stdlib.h>
typedef enum yp_pack_version {
YP_PACK_VERSION_3_2_0
} yp_pack_version;
typedef enum pm_pack_version {
PM_PACK_VERSION_3_2_0
} pm_pack_version;
typedef enum yp_pack_variant {
YP_PACK_VARIANT_PACK,
YP_PACK_VARIANT_UNPACK
} yp_pack_variant;
typedef enum pm_pack_variant {
PM_PACK_VARIANT_PACK,
PM_PACK_VARIANT_UNPACK
} pm_pack_variant;
typedef enum yp_pack_type {
YP_PACK_SPACE,
YP_PACK_COMMENT,
YP_PACK_INTEGER,
YP_PACK_UTF8,
YP_PACK_BER,
YP_PACK_FLOAT,
YP_PACK_STRING_SPACE_PADDED,
YP_PACK_STRING_NULL_PADDED,
YP_PACK_STRING_NULL_TERMINATED,
YP_PACK_STRING_MSB,
YP_PACK_STRING_LSB,
YP_PACK_STRING_HEX_HIGH,
YP_PACK_STRING_HEX_LOW,
YP_PACK_STRING_UU,
YP_PACK_STRING_MIME,
YP_PACK_STRING_BASE64,
YP_PACK_STRING_FIXED,
YP_PACK_STRING_POINTER,
YP_PACK_MOVE,
YP_PACK_BACK,
YP_PACK_NULL,
YP_PACK_END
} yp_pack_type;
typedef enum pm_pack_type {
PM_PACK_SPACE,
PM_PACK_COMMENT,
PM_PACK_INTEGER,
PM_PACK_UTF8,
PM_PACK_BER,
PM_PACK_FLOAT,
PM_PACK_STRING_SPACE_PADDED,
PM_PACK_STRING_NULL_PADDED,
PM_PACK_STRING_NULL_TERMINATED,
PM_PACK_STRING_MSB,
PM_PACK_STRING_LSB,
PM_PACK_STRING_HEX_HIGH,
PM_PACK_STRING_HEX_LOW,
PM_PACK_STRING_UU,
PM_PACK_STRING_MIME,
PM_PACK_STRING_BASE64,
PM_PACK_STRING_FIXED,
PM_PACK_STRING_POINTER,
PM_PACK_MOVE,
PM_PACK_BACK,
PM_PACK_NULL,
PM_PACK_END
} pm_pack_type;
typedef enum yp_pack_signed {
YP_PACK_UNSIGNED,
YP_PACK_SIGNED,
YP_PACK_SIGNED_NA
} yp_pack_signed;
typedef enum pm_pack_signed {
PM_PACK_UNSIGNED,
PM_PACK_SIGNED,
PM_PACK_SIGNED_NA
} pm_pack_signed;
typedef enum yp_pack_endian {
YP_PACK_AGNOSTIC_ENDIAN,
YP_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
YP_PACK_BIG_ENDIAN, // aka 'network', or 'N'
YP_PACK_NATIVE_ENDIAN,
YP_PACK_ENDIAN_NA
} yp_pack_endian;
typedef enum pm_pack_endian {
PM_PACK_AGNOSTIC_ENDIAN,
PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
PM_PACK_BIG_ENDIAN, // aka 'network', or 'N'
PM_PACK_NATIVE_ENDIAN,
PM_PACK_ENDIAN_NA
} pm_pack_endian;
typedef enum yp_pack_size {
YP_PACK_SIZE_SHORT,
YP_PACK_SIZE_INT,
YP_PACK_SIZE_LONG,
YP_PACK_SIZE_LONG_LONG,
YP_PACK_SIZE_8,
YP_PACK_SIZE_16,
YP_PACK_SIZE_32,
YP_PACK_SIZE_64,
YP_PACK_SIZE_P,
YP_PACK_SIZE_NA
} yp_pack_size;
typedef enum pm_pack_size {
PM_PACK_SIZE_SHORT,
PM_PACK_SIZE_INT,
PM_PACK_SIZE_LONG,
PM_PACK_SIZE_LONG_LONG,
PM_PACK_SIZE_8,
PM_PACK_SIZE_16,
PM_PACK_SIZE_32,
PM_PACK_SIZE_64,
PM_PACK_SIZE_P,
PM_PACK_SIZE_NA
} pm_pack_size;
typedef enum yp_pack_length_type {
YP_PACK_LENGTH_FIXED,
YP_PACK_LENGTH_MAX,
YP_PACK_LENGTH_RELATIVE, // special case for unpack @*
YP_PACK_LENGTH_NA
} yp_pack_length_type;
typedef enum pm_pack_length_type {
PM_PACK_LENGTH_FIXED,
PM_PACK_LENGTH_MAX,
PM_PACK_LENGTH_RELATIVE, // special case for unpack @*
PM_PACK_LENGTH_NA
} pm_pack_length_type;
typedef enum yp_pack_encoding {
YP_PACK_ENCODING_START,
YP_PACK_ENCODING_ASCII_8BIT,
YP_PACK_ENCODING_US_ASCII,
YP_PACK_ENCODING_UTF_8
} yp_pack_encoding;
typedef enum pm_pack_encoding {
PM_PACK_ENCODING_START,
PM_PACK_ENCODING_ASCII_8BIT,
PM_PACK_ENCODING_US_ASCII,
PM_PACK_ENCODING_UTF_8
} pm_pack_encoding;
typedef enum yp_pack_result {
YP_PACK_OK,
YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
YP_PACK_ERROR_UNKNOWN_DIRECTIVE,
YP_PACK_ERROR_LENGTH_TOO_BIG,
YP_PACK_ERROR_BANG_NOT_ALLOWED,
YP_PACK_ERROR_DOUBLE_ENDIAN
} yp_pack_result;
typedef enum pm_pack_result {
PM_PACK_OK,
PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
PM_PACK_ERROR_LENGTH_TOO_BIG,
PM_PACK_ERROR_BANG_NOT_ALLOWED,
PM_PACK_ERROR_DOUBLE_ENDIAN
} pm_pack_result;
// Parse a single directive from a pack or unpack format string.
//
// Parameters:
// - [in] yp_pack_version version the version of Ruby
// - [in] yp_pack_variant variant pack or unpack
// - [in] pm_pack_version version the version of Ruby
// - [in] pm_pack_variant variant pack or unpack
// - [in out] const char **format the start of the next directive to parse
// on calling, and advanced beyond the parsed directive on return, or as
// much of it as was consumed until an error was encountered
// - [in] const char *format_end the end of the format string
// - [out] yp_pack_type *type the type of the directive
// - [out] yp_pack_signed *signed_type
// - [out] pm_pack_type *type the type of the directive
// - [out] pm_pack_signed *signed_type
// whether the value is signed
// - [out] yp_pack_endian *endian the endianness of the value
// - [out] yp_pack_size *size the size of the value
// - [out] yp_pack_length_type *length_type
// - [out] pm_pack_endian *endian the endianness of the value
// - [out] pm_pack_size *size the size of the value
// - [out] pm_pack_length_type *length_type
// what kind of length is specified
// - [out] size_t *length the length of the directive
// - [in out] yp_pack_encoding *encoding
// - [in out] pm_pack_encoding *encoding
// takes the current encoding of the string
// which would result from parsing the whole format string, and returns a
// possibly changed directive - the encoding should be
// YP_PACK_ENCODING_START when yp_pack_parse is called for the first
// PM_PACK_ENCODING_START when pm_pack_parse is called for the first
// directive in a format string
//
// Return:
// - YP_PACK_OK on success
// - YP_PACK_ERROR_* on error
// - PM_PACK_OK on success
// - PM_PACK_ERROR_* on error
//
// Notes:
// Consult Ruby documentation for the meaning of directives.
YP_EXPORTED_FUNCTION yp_pack_result
yp_pack_parse(
yp_pack_variant variant_arg,
PRISM_EXPORTED_FUNCTION pm_pack_result
pm_pack_parse(
pm_pack_variant variant_arg,
const char **format,
const char *format_end,
yp_pack_type *type,
yp_pack_signed *signed_type,
yp_pack_endian *endian,
yp_pack_size *size,
yp_pack_length_type *length_type,
pm_pack_type *type,
pm_pack_signed *signed_type,
pm_pack_endian *endian,
pm_pack_size *size,
pm_pack_length_type *length_type,
uint64_t *length,
yp_pack_encoding *encoding
pm_pack_encoding *encoding
);
// YARP abstracts sizes away from the native system - this converts an abstract
// prism abstracts sizes away from the native system - this converts an abstract
// size to a native size.
YP_EXPORTED_FUNCTION size_t yp_size_to_native(yp_pack_size size);
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
#endif

Просмотреть файл

@ -1,13 +1,13 @@
#ifndef YARP_PARSER_H
#define YARP_PARSER_H
#ifndef PRISM_PARSER_H
#define PRISM_PARSER_H
#include "yarp/ast.h"
#include "yarp/defines.h"
#include "yarp/enc/yp_encoding.h"
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_newline_list.h"
#include "yarp/util/yp_state_stack.h"
#include "prism/ast.h"
#include "prism/defines.h"
#include "prism/enc/pm_encoding.h"
#include "prism/util/pm_constant_pool.h"
#include "prism/util/pm_list.h"
#include "prism/util/pm_newline_list.h"
#include "prism/util/pm_state_stack.h"
#include <stdbool.h>
@ -15,88 +15,88 @@
// the lexer can track. This is used to determine which kind of token to return
// based on the context of the parser.
typedef enum {
YP_LEX_STATE_BIT_BEG,
YP_LEX_STATE_BIT_END,
YP_LEX_STATE_BIT_ENDARG,
YP_LEX_STATE_BIT_ENDFN,
YP_LEX_STATE_BIT_ARG,
YP_LEX_STATE_BIT_CMDARG,
YP_LEX_STATE_BIT_MID,
YP_LEX_STATE_BIT_FNAME,
YP_LEX_STATE_BIT_DOT,
YP_LEX_STATE_BIT_CLASS,
YP_LEX_STATE_BIT_LABEL,
YP_LEX_STATE_BIT_LABELED,
YP_LEX_STATE_BIT_FITEM
} yp_lex_state_bit_t;
PM_LEX_STATE_BIT_BEG,
PM_LEX_STATE_BIT_END,
PM_LEX_STATE_BIT_ENDARG,
PM_LEX_STATE_BIT_ENDFN,
PM_LEX_STATE_BIT_ARG,
PM_LEX_STATE_BIT_CMDARG,
PM_LEX_STATE_BIT_MID,
PM_LEX_STATE_BIT_FNAME,
PM_LEX_STATE_BIT_DOT,
PM_LEX_STATE_BIT_CLASS,
PM_LEX_STATE_BIT_LABEL,
PM_LEX_STATE_BIT_LABELED,
PM_LEX_STATE_BIT_FITEM
} pm_lex_state_bit_t;
// This enum combines the various bits from the above enum into individual
// values that represent the various states of the lexer.
typedef enum {
YP_LEX_STATE_NONE = 0,
YP_LEX_STATE_BEG = (1 << YP_LEX_STATE_BIT_BEG),
YP_LEX_STATE_END = (1 << YP_LEX_STATE_BIT_END),
YP_LEX_STATE_ENDARG = (1 << YP_LEX_STATE_BIT_ENDARG),
YP_LEX_STATE_ENDFN = (1 << YP_LEX_STATE_BIT_ENDFN),
YP_LEX_STATE_ARG = (1 << YP_LEX_STATE_BIT_ARG),
YP_LEX_STATE_CMDARG = (1 << YP_LEX_STATE_BIT_CMDARG),
YP_LEX_STATE_MID = (1 << YP_LEX_STATE_BIT_MID),
YP_LEX_STATE_FNAME = (1 << YP_LEX_STATE_BIT_FNAME),
YP_LEX_STATE_DOT = (1 << YP_LEX_STATE_BIT_DOT),
YP_LEX_STATE_CLASS = (1 << YP_LEX_STATE_BIT_CLASS),
YP_LEX_STATE_LABEL = (1 << YP_LEX_STATE_BIT_LABEL),
YP_LEX_STATE_LABELED = (1 << YP_LEX_STATE_BIT_LABELED),
YP_LEX_STATE_FITEM = (1 << YP_LEX_STATE_BIT_FITEM),
YP_LEX_STATE_BEG_ANY = YP_LEX_STATE_BEG | YP_LEX_STATE_MID | YP_LEX_STATE_CLASS,
YP_LEX_STATE_ARG_ANY = YP_LEX_STATE_ARG | YP_LEX_STATE_CMDARG,
YP_LEX_STATE_END_ANY = YP_LEX_STATE_END | YP_LEX_STATE_ENDARG | YP_LEX_STATE_ENDFN
} yp_lex_state_t;
PM_LEX_STATE_NONE = 0,
PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
} pm_lex_state_t;
typedef enum {
YP_HEREDOC_QUOTE_NONE,
YP_HEREDOC_QUOTE_SINGLE = '\'',
YP_HEREDOC_QUOTE_DOUBLE = '"',
YP_HEREDOC_QUOTE_BACKTICK = '`',
} yp_heredoc_quote_t;
PM_HEREDOC_QUOTE_NONE,
PM_HEREDOC_QUOTE_SINGLE = '\'',
PM_HEREDOC_QUOTE_DOUBLE = '"',
PM_HEREDOC_QUOTE_BACKTICK = '`',
} pm_heredoc_quote_t;
typedef enum {
YP_HEREDOC_INDENT_NONE,
YP_HEREDOC_INDENT_DASH,
YP_HEREDOC_INDENT_TILDE,
} yp_heredoc_indent_t;
PM_HEREDOC_INDENT_NONE,
PM_HEREDOC_INDENT_DASH,
PM_HEREDOC_INDENT_TILDE,
} pm_heredoc_indent_t;
// When lexing Ruby source, the lexer has a small amount of state to tell which
// kind of token it is currently lexing. For example, when we find the start of
// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
// that the lexer is now in the YP_LEX_STRING mode, and will return tokens that
// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
// are found as part of a string.
typedef struct yp_lex_mode {
typedef struct pm_lex_mode {
enum {
// This state is used when any given token is being lexed.
YP_LEX_DEFAULT,
PM_LEX_DEFAULT,
// This state is used when we're lexing as normal but inside an embedded
// expression of a string.
YP_LEX_EMBEXPR,
PM_LEX_EMBEXPR,
// This state is used when we're lexing a variable that is embedded
// directly inside of a string with the # shorthand.
YP_LEX_EMBVAR,
PM_LEX_EMBVAR,
// This state is used when you are inside the content of a heredoc.
YP_LEX_HEREDOC,
PM_LEX_HEREDOC,
// This state is used when we are lexing a list of tokens, as in a %w
// word list literal or a %i symbol list literal.
YP_LEX_LIST,
PM_LEX_LIST,
// This state is used when a regular expression has been begun and we
// are looking for the terminator.
YP_LEX_REGEXP,
PM_LEX_REGEXP,
// This state is used when we are lexing a string or a string-like
// token, as in string content with either quote or an xstring.
YP_LEX_STRING
PM_LEX_STRING
} mode;
union {
@ -166,8 +166,8 @@ typedef struct yp_lex_mode {
const uint8_t *ident_start;
size_t ident_length;
yp_heredoc_quote_t quote;
yp_heredoc_indent_t indent;
pm_heredoc_quote_t quote;
pm_heredoc_indent_t indent;
// This is the pointer to the character where lexing should resume
// once the heredoc has been completely processed.
@ -176,83 +176,83 @@ typedef struct yp_lex_mode {
} as;
// The previous lex state so that it knows how to pop.
struct yp_lex_mode *prev;
} yp_lex_mode_t;
struct pm_lex_mode *prev;
} pm_lex_mode_t;
// We pre-allocate a certain number of lex states in order to avoid having to
// call malloc too many times while parsing. You really shouldn't need more than
// this because you only really nest deeply when doing string interpolation.
#define YP_LEX_STACK_SIZE 4
#define PM_LEX_STACK_SIZE 4
// A forward declaration since our error handler struct accepts a parser for
// each of its function calls.
typedef struct yp_parser yp_parser_t;
typedef struct pm_parser pm_parser_t;
// While parsing, we keep track of a stack of contexts. This is helpful for
// error recovery so that we can pop back to a previous context when we hit a
// token that is understood by a parent context but not by the current context.
typedef enum {
YP_CONTEXT_BEGIN, // a begin statement
YP_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
YP_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
YP_CONTEXT_CASE_WHEN, // a case when statements
YP_CONTEXT_CASE_IN, // a case in statements
YP_CONTEXT_CLASS, // a class declaration
YP_CONTEXT_DEF, // a method definition
YP_CONTEXT_DEF_PARAMS, // a method definition's parameters
YP_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
YP_CONTEXT_ELSE, // an else clause
YP_CONTEXT_ELSIF, // an elsif clause
YP_CONTEXT_EMBEXPR, // an interpolated expression
YP_CONTEXT_ENSURE, // an ensure statement
YP_CONTEXT_FOR, // a for loop
YP_CONTEXT_IF, // an if statement
YP_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
YP_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
YP_CONTEXT_MAIN, // the top level context
YP_CONTEXT_MODULE, // a module declaration
YP_CONTEXT_PARENS, // a parenthesized expression
YP_CONTEXT_POSTEXE, // an END block
YP_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
YP_CONTEXT_PREEXE, // a BEGIN block
YP_CONTEXT_RESCUE_ELSE, // a rescue else statement
YP_CONTEXT_RESCUE, // a rescue statement
YP_CONTEXT_SCLASS, // a singleton class definition
YP_CONTEXT_UNLESS, // an unless statement
YP_CONTEXT_UNTIL, // an until statement
YP_CONTEXT_WHILE, // a while statement
} yp_context_t;
PM_CONTEXT_BEGIN, // a begin statement
PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
PM_CONTEXT_CASE_WHEN, // a case when statements
PM_CONTEXT_CASE_IN, // a case in statements
PM_CONTEXT_CLASS, // a class declaration
PM_CONTEXT_DEF, // a method definition
PM_CONTEXT_DEF_PARAMS, // a method definition's parameters
PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
PM_CONTEXT_ELSE, // an else clause
PM_CONTEXT_ELSIF, // an elsif clause
PM_CONTEXT_EMBEXPR, // an interpolated expression
PM_CONTEXT_ENSURE, // an ensure statement
PM_CONTEXT_FOR, // a for loop
PM_CONTEXT_IF, // an if statement
PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
PM_CONTEXT_MAIN, // the top level context
PM_CONTEXT_MODULE, // a module declaration
PM_CONTEXT_PARENS, // a parenthesized expression
PM_CONTEXT_POSTEXE, // an END block
PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
PM_CONTEXT_PREEXE, // a BEGIN block
PM_CONTEXT_RESCUE_ELSE, // a rescue else statement
PM_CONTEXT_RESCUE, // a rescue statement
PM_CONTEXT_SCLASS, // a singleton class definition
PM_CONTEXT_UNLESS, // an unless statement
PM_CONTEXT_UNTIL, // an until statement
PM_CONTEXT_WHILE, // a while statement
} pm_context_t;
// This is a node in a linked list of contexts.
typedef struct yp_context_node {
yp_context_t context;
struct yp_context_node *prev;
} yp_context_node_t;
typedef struct pm_context_node {
pm_context_t context;
struct pm_context_node *prev;
} pm_context_node_t;
// This is the type of a comment that we've found while parsing.
typedef enum {
YP_COMMENT_INLINE,
YP_COMMENT_EMBDOC,
YP_COMMENT___END__
} yp_comment_type_t;
PM_COMMENT_INLINE,
PM_COMMENT_EMBDOC,
PM_COMMENT___END__
} pm_comment_type_t;
// This is a node in the linked list of comments that we've found while parsing.
typedef struct yp_comment {
yp_list_node_t node;
typedef struct pm_comment {
pm_list_node_t node;
const uint8_t *start;
const uint8_t *end;
yp_comment_type_t type;
} yp_comment_t;
pm_comment_type_t type;
} pm_comment_t;
// When the encoding that is being used to parse the source is changed by YARP,
// When the encoding that is being used to parse the source is changed by prism,
// we provide the ability here to call out to a user-defined function.
typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
// When an encoding is encountered that isn't understood by YARP, we provide
// When an encoding is encountered that isn't understood by prism, we provide
// the ability here to call out to a user-defined function to get an encoding
// struct. If the function returns something that isn't NULL, we set that to
// our encoding and use it to parse identifiers.
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
// When you are lexing through a file, the lexer needs all of the information
// that the parser additionally provides (for example, the local table). So if
@ -268,17 +268,17 @@ typedef struct {
// This is the callback that is called when a token is lexed. It is passed
// the opaque data pointer, the parser, and the token that was lexed.
void (*callback)(void *data, yp_parser_t *parser, yp_token_t *token);
} yp_lex_callback_t;
void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
} pm_lex_callback_t;
// This struct represents a node in a linked list of scopes. Some scopes can see
// into their parent scopes, while others cannot.
typedef struct yp_scope {
typedef struct pm_scope {
// The IDs of the locals in the given scope.
yp_constant_id_list_t locals;
pm_constant_id_list_t locals;
// A pointer to the previous scope in the linked list.
struct yp_scope *previous;
struct pm_scope *previous;
// A boolean indicating whether or not this scope can see into its parent.
// If closed is true, then the scope cannot see into its parent.
@ -293,14 +293,14 @@ typedef struct yp_scope {
// This is necessary to determine if child blocks are allowed to use
// numbered parameters.
bool numbered_params;
} yp_scope_t;
} pm_scope_t;
// This struct represents the overall parser. It contains a reference to the
// source file, as well as pointers that indicate where in the source it's
// currently parsing. It also contains the most recent and current token that
// it's considering.
struct yp_parser {
yp_lex_state_t lex_state; // the current state of the lexer
struct pm_parser {
pm_lex_state_t lex_state; // the current state of the lexer
int enclosure_nesting; // tracks the current nesting of (), [], and {}
// Used to temporarily track the nesting of enclosures to determine if a {
@ -313,22 +313,22 @@ struct yp_parser {
// the stack used to determine if a do keyword belongs to the predicate of a
// while, until, or for loop
yp_state_stack_t do_loop_stack;
pm_state_stack_t do_loop_stack;
// the stack used to determine if a do keyword belongs to the beginning of a
// block
yp_state_stack_t accepts_block_stack;
pm_state_stack_t accepts_block_stack;
struct {
yp_lex_mode_t *current; // the current mode of the lexer
yp_lex_mode_t stack[YP_LEX_STACK_SIZE]; // the stack of lexer modes
pm_lex_mode_t *current; // the current mode of the lexer
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
size_t index; // the current index into the lexer mode stack
} lex_modes;
const uint8_t *start; // the pointer to the start of the source
const uint8_t *end; // the pointer to the end of the source
yp_token_t previous; // the previous token we were considering
yp_token_t current; // the current token we're considering
pm_token_t previous; // the previous token we were considering
pm_token_t current; // the current token we're considering
// This is a special field set on the parser when we need the parser to jump
// to a specific location when lexing the next token, as opposed to just
@ -341,26 +341,27 @@ struct yp_parser {
// found on a line then this is NULL.
const uint8_t *heredoc_end;
yp_list_t comment_list; // the list of comments that have been found while parsing
yp_list_t warning_list; // the list of warnings that have been found while parsing
yp_list_t error_list; // the list of errors that have been found while parsing
yp_scope_t *current_scope; // the current local scope
pm_list_t comment_list; // the list of comments that have been found while parsing
pm_list_t warning_list; // the list of warnings that have been found while parsing
pm_list_t error_list; // the list of errors that have been found while parsing
pm_scope_t *current_scope; // the current local scope
yp_context_node_t *current_context; // the current parsing context
pm_context_node_t *current_context; // the current parsing context
// The encoding functions for the current file is attached to the parser as
// it's parsing so that it can change with a magic comment.
yp_encoding_t encoding;
pm_encoding_t encoding;
// When the encoding that is being used to parse the source is changed by
// YARP, we provide the ability here to call out to a user-defined function.
yp_encoding_changed_callback_t encoding_changed_callback;
// prism, we provide the ability here to call out to a user-defined
// function.
pm_encoding_changed_callback_t encoding_changed_callback;
// When an encoding is encountered that isn't understood by YARP, we provide
// the ability here to call out to a user-defined function to get an
// When an encoding is encountered that isn't understood by prism, we
// provide the ability here to call out to a user-defined function to get an
// encoding struct. If the function returns something that isn't NULL, we
// set that to our encoding and use it to parse identifiers.
yp_encoding_decode_callback_t encoding_decode_callback;
pm_encoding_decode_callback_t encoding_decode_callback;
// This pointer indicates where a comment must start if it is to be
// considered an encoding comment.
@ -368,24 +369,24 @@ struct yp_parser {
// This is an optional callback that can be attached to the parser that will
// be called whenever a new token is lexed by the parser.
yp_lex_callback_t *lex_callback;
pm_lex_callback_t *lex_callback;
// This is the path of the file being parsed
// We use the filepath when constructing SourceFileNodes
yp_string_t filepath_string;
pm_string_t filepath_string;
// This constant pool keeps all of the constants defined throughout the file
// so that we can reference them later.
yp_constant_pool_t constant_pool;
pm_constant_pool_t constant_pool;
// This is the list of newline offsets in the source file.
yp_newline_list_t newline_list;
pm_newline_list_t newline_list;
// We want to add a flag to integer nodes that indicates their base. We only
// want to parse these once, but we don't have space on the token itself to
// communicate this information. So we store it here and pass it through
// when we find tokens that we need it for.
yp_node_flags_t integer_base;
pm_node_flags_t integer_base;
// Whether or not we're at the beginning of a command
bool command_start;
@ -414,4 +415,4 @@ struct yp_parser {
bool frozen_string_literal;
};
#endif // YARP_PARSER_H
#endif // PRISM_PARSER_H

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,19 +1,19 @@
#ifndef YARP_H
#define YARP_H
#ifndef PRISM_H
#define PRISM_H
#include "yarp/defines.h"
#include "yarp/ast.h"
#include "yarp/diagnostic.h"
#include "yarp/node.h"
#include "yarp/pack.h"
#include "yarp/parser.h"
#include "yarp/regexp.h"
#include "yarp/unescape.h"
#include "yarp/util/yp_buffer.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_strpbrk.h"
#include "yarp/version.h"
#include "prism/defines.h"
#include "prism/ast.h"
#include "prism/diagnostic.h"
#include "prism/node.h"
#include "prism/pack.h"
#include "prism/parser.h"
#include "prism/regexp.h"
#include "prism/unescape.h"
#include "prism/util/pm_buffer.h"
#include "prism/util/pm_char.h"
#include "prism/util/pm_memchr.h"
#include "prism/util/pm_strpbrk.h"
#include "prism/version.h"
#include <assert.h>
#include <errno.h>
@ -28,55 +28,55 @@
#include <strings.h>
#endif
void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
void yp_print_node(yp_parser_t *parser, yp_node_t *node);
void pm_print_node(pm_parser_t *parser, pm_node_t *node);
void yp_parser_metadata(yp_parser_t *parser, const char *metadata);
void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
// Generate a scope node from the given node.
void yp_scope_node_init(yp_node_t *node, yp_scope_node_t *dest);
void pm_scope_node_init(pm_node_t *node, pm_scope_node_t *dest);
// The YARP version and the serialization format.
YP_EXPORTED_FUNCTION const char * yp_version(void);
// The prism version and the serialization format.
PRISM_EXPORTED_FUNCTION const char * pm_version(void);
// Initialize a parser with the given start and end pointers.
YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
// Register a callback that will be called whenever YARP changes the encoding it
// Register a callback that will be called whenever prism changes the encoding it
// is using to parse based on the magic comment.
YP_EXPORTED_FUNCTION void yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback);
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
// Register a callback that will be called when YARP encounters a magic comment
// Register a callback that will be called when prism encounters a magic comment
// with an encoding referenced that it doesn't understand. The callback should
// return NULL if it also doesn't understand the encoding or it should return a
// pointer to a yp_encoding_t struct that contains the functions necessary to
// pointer to a pm_encoding_t struct that contains the functions necessary to
// parse identifiers.
YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback);
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
// Free any memory associated with the given parser.
YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
// Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
// Pretty-prints the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
// Serialize the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
// Parse the given source to the AST and serialize the AST to the given buffer.
YP_EXPORTED_FUNCTION void yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);
PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
// Lex the given source and serialize to the given buffer.
YP_EXPORTED_FUNCTION void yp_lex_serialize(const uint8_t *source, size_t size, const char *filepath, yp_buffer_t *buffer);
PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer);
// Parse and serialize both the AST and the tokens represented by the given
// source to the given buffer.
YP_EXPORTED_FUNCTION void yp_parse_lex_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);
PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
// Returns a string representation of the given token type.
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
#endif

Просмотреть файл

@ -1,19 +1,19 @@
#include "yarp/regexp.h"
#include "prism/regexp.h"
// This is the parser that is going to handle parsing regular expressions.
typedef struct {
const uint8_t *start;
const uint8_t *cursor;
const uint8_t *end;
yp_string_list_t *named_captures;
pm_string_list_t *named_captures;
bool encoding_changed;
yp_encoding_t *encoding;
} yp_regexp_parser_t;
pm_encoding_t *encoding;
} pm_regexp_parser_t;
// This initializes a new parser with the given source.
static void
yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
*parser = (yp_regexp_parser_t) {
pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
*parser = (pm_regexp_parser_t) {
.start = start,
.cursor = start,
.end = end,
@ -25,23 +25,23 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const ui
// This appends a new string to the list of named captures.
static void
yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
yp_string_t string;
yp_string_shared_init(&string, start, end);
yp_string_list_append(parser->named_captures, &string);
yp_string_free(&string);
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
pm_string_t string;
pm_string_shared_init(&string, start, end);
pm_string_list_append(parser->named_captures, &string);
pm_string_free(&string);
}
// Returns true if the next character is the end of the source.
static inline bool
yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {
pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
return parser->cursor >= parser->end;
}
// Optionally accept a char and consume it if it exists.
static inline bool
yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
parser->cursor++;
return true;
}
@ -50,8 +50,8 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
// Expect a character to be present and consume it.
static inline bool
yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
parser->cursor++;
return true;
}
@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
// This advances the current token to the next instance of the given character.
static bool
yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
if (yp_regexp_char_is_eof(parser)) {
pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
if (pm_regexp_char_is_eof(parser)) {
return false;
}
const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
const uint8_t *end = (const uint8_t *) pm_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
if (end == NULL) {
return false;
}
@ -106,41 +106,41 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
// Note that by the time we've hit this function, the lbrace has already been
// consumed so we're in the start state.
static bool
yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
const uint8_t *savepoint = parser->cursor;
enum {
YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
YP_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
YP_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
} state = YP_REGEXP_RANGE_QUANTIFIER_STATE_START;
PM_REGEXP_RANGE_QUANTIFIER_STATE_START,
PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
} state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
while (1) {
switch (state) {
case YP_REGEXP_RANGE_QUANTIFIER_STATE_START:
case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
switch (*parser->cursor) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
parser->cursor++;
state = YP_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
break;
case ',':
parser->cursor++;
state = YP_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
state = PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
break;
default:
parser->cursor = savepoint;
return true;
}
break;
case YP_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
case PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
switch (*parser->cursor) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
parser->cursor++;
break;
case ',':
parser->cursor++;
state = YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
break;
case '}':
parser->cursor++;
@ -150,18 +150,18 @@ yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
return true;
}
break;
case YP_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
case PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
switch (*parser->cursor) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
parser->cursor++;
state = YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
break;
default:
parser->cursor = savepoint;
return true;
}
break;
case YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
case PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
switch (*parser->cursor) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
parser->cursor++;
@ -187,7 +187,7 @@ yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
// | <empty>
// ;
static bool
yp_regexp_parse_quantifier(yp_regexp_parser_t *parser) {
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
switch (*parser->cursor) {
case '*':
case '+':
@ -196,7 +196,7 @@ yp_regexp_parse_quantifier(yp_regexp_parser_t *parser) {
return true;
case '{':
parser->cursor++;
return yp_regexp_parse_range_quantifier(parser);
return pm_regexp_parse_range_quantifier(parser);
default:
// In this case there is no quantifier.
return true;
@ -206,37 +206,37 @@ yp_regexp_parse_quantifier(yp_regexp_parser_t *parser) {
// match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
// ;
static bool
yp_regexp_parse_posix_class(yp_regexp_parser_t *parser) {
if (!yp_regexp_char_expect(parser, ':')) {
pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
if (!pm_regexp_char_expect(parser, ':')) {
return false;
}
yp_regexp_char_accept(parser, '^');
pm_regexp_char_accept(parser, '^');
return (
yp_regexp_char_find(parser, ':') &&
yp_regexp_char_expect(parser, ']') &&
yp_regexp_char_expect(parser, ']')
pm_regexp_char_find(parser, ':') &&
pm_regexp_char_expect(parser, ']') &&
pm_regexp_char_expect(parser, ']')
);
}
// Forward declaration because character sets can be nested.
static bool
yp_regexp_parse_lbracket(yp_regexp_parser_t *parser);
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
// match-char-set : '[' '^'? (match-range | match-char)* ']'
// ;
static bool
yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
yp_regexp_char_accept(parser, '^');
pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
pm_regexp_char_accept(parser, '^');
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ']') {
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
switch (*parser->cursor++) {
case '[':
yp_regexp_parse_lbracket(parser);
pm_regexp_parse_lbracket(parser);
break;
case '\\':
if (!yp_regexp_char_is_eof(parser)) {
if (!pm_regexp_char_is_eof(parser)) {
parser->cursor++;
}
break;
@ -246,78 +246,78 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
}
}
return yp_regexp_char_expect(parser, ']');
return pm_regexp_char_expect(parser, ']');
}
// A left bracket can either mean a POSIX class or a character set.
static bool
yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
const uint8_t *reset = parser->cursor;
if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
parser->cursor++;
if (yp_regexp_parse_posix_class(parser)) return true;
if (pm_regexp_parse_posix_class(parser)) return true;
parser->cursor = reset;
}
return yp_regexp_parse_character_set(parser);
return pm_regexp_parse_character_set(parser);
}
// Forward declaration here since parsing groups needs to go back up the grammar
// to parse expressions within them.
static bool
yp_regexp_parse_expression(yp_regexp_parser_t *parser);
pm_regexp_parse_expression(pm_regexp_parser_t *parser);
// These are the states of the options that are configurable on the regular
// expression (or from within a group).
typedef enum {
YP_REGEXP_OPTION_STATE_INVALID,
YP_REGEXP_OPTION_STATE_TOGGLEABLE,
YP_REGEXP_OPTION_STATE_ADDABLE,
YP_REGEXP_OPTION_STATE_ADDED,
YP_REGEXP_OPTION_STATE_REMOVED
} yp_regexp_option_state_t;
PM_REGEXP_OPTION_STATE_INVALID,
PM_REGEXP_OPTION_STATE_TOGGLEABLE,
PM_REGEXP_OPTION_STATE_ADDABLE,
PM_REGEXP_OPTION_STATE_ADDED,
PM_REGEXP_OPTION_STATE_REMOVED
} pm_regexp_option_state_t;
// These are the options that are configurable on the regular expression (or
// from within a group).
#define YP_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
#define YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
#define YP_REGEXP_OPTION_STATE_SLOTS (YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
// This is the set of options that are configurable on the regular expression.
typedef struct {
uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
} yp_regexp_options_t;
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
} pm_regexp_options_t;
// Initialize a new set of options to their default values.
static void
yp_regexp_options_init(yp_regexp_options_t *options) {
memset(options, YP_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * YP_REGEXP_OPTION_STATE_SLOTS);
options->values['i' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_TOGGLEABLE;
options->values['m' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_TOGGLEABLE;
options->values['x' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_TOGGLEABLE;
options->values['d' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_ADDABLE;
options->values['a' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_ADDABLE;
options->values['u' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_ADDABLE;
pm_regexp_options_init(pm_regexp_options_t *options) {
memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
options->values['i' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
options->values['m' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
options->values['x' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
options->values['d' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
options->values['a' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
}
// Attempt to add the given option to the set of options. Returns true if it was
// added, false if it was already present.
static bool
yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
switch (options->values[key]) {
case YP_REGEXP_OPTION_STATE_INVALID:
case YP_REGEXP_OPTION_STATE_REMOVED:
case PM_REGEXP_OPTION_STATE_INVALID:
case PM_REGEXP_OPTION_STATE_REMOVED:
return false;
case YP_REGEXP_OPTION_STATE_TOGGLEABLE:
case YP_REGEXP_OPTION_STATE_ADDABLE:
options->values[key] = YP_REGEXP_OPTION_STATE_ADDED;
case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
case PM_REGEXP_OPTION_STATE_ADDABLE:
options->values[key] = PM_REGEXP_OPTION_STATE_ADDED;
return true;
case YP_REGEXP_OPTION_STATE_ADDED:
case PM_REGEXP_OPTION_STATE_ADDED:
return true;
}
}
@ -328,18 +328,18 @@ yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
// Attempt to remove the given option from the set of options. Returns true if
// it was removed, false if it was already absent.
static bool
yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
switch (options->values[key]) {
case YP_REGEXP_OPTION_STATE_INVALID:
case YP_REGEXP_OPTION_STATE_ADDABLE:
case PM_REGEXP_OPTION_STATE_INVALID:
case PM_REGEXP_OPTION_STATE_ADDABLE:
return false;
case YP_REGEXP_OPTION_STATE_TOGGLEABLE:
case YP_REGEXP_OPTION_STATE_ADDED:
case YP_REGEXP_OPTION_STATE_REMOVED:
options->values[key] = YP_REGEXP_OPTION_STATE_REMOVED;
case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
case PM_REGEXP_OPTION_STATE_ADDED:
case PM_REGEXP_OPTION_STATE_REMOVED:
options->values[key] = PM_REGEXP_OPTION_STATE_REMOVED;
return true;
}
}
@ -368,14 +368,14 @@ yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
// * (?imxdau-imx:subexp) - turn on and off configuration for an expression
//
static bool
yp_regexp_parse_group(yp_regexp_parser_t *parser) {
pm_regexp_parse_group(pm_regexp_parser_t *parser) {
// First, parse any options for the group.
if (yp_regexp_char_accept(parser, '?')) {
if (yp_regexp_char_is_eof(parser)) {
if (pm_regexp_char_accept(parser, '?')) {
if (pm_regexp_char_is_eof(parser)) {
return false;
}
yp_regexp_options_t options;
yp_regexp_options_init(&options);
pm_regexp_options_t options;
pm_regexp_options_init(&options);
switch (*parser->cursor) {
case '#': { // inline comments
@ -403,10 +403,10 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
// Here we can take the fast path and use memchr to find the
// next ) because we are safe checking backward for \ since
// it cannot be a trailing character.
bool found = yp_regexp_char_find(parser, ')');
bool found = pm_regexp_char_find(parser, ')');
while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
found = yp_regexp_char_find(parser, ')');
found = pm_regexp_char_find(parser, ')');
}
return found;
@ -421,7 +421,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
break;
case '<':
parser->cursor++;
if (yp_regexp_char_is_eof(parser)) {
if (pm_regexp_char_is_eof(parser)) {
return false;
}
@ -432,37 +432,37 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
break;
default: { // named capture group
const uint8_t *start = parser->cursor;
if (!yp_regexp_char_find(parser, '>')) {
if (!pm_regexp_char_find(parser, '>')) {
return false;
}
yp_regexp_parser_named_capture(parser, start, parser->cursor - 1);
pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
break;
}
}
break;
case '\'': { // named capture group
const uint8_t *start = ++parser->cursor;
if (!yp_regexp_char_find(parser, '\'')) {
if (!pm_regexp_char_find(parser, '\'')) {
return false;
}
yp_regexp_parser_named_capture(parser, start, parser->cursor - 1);
pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
break;
}
case '(': // conditional expression
if (!yp_regexp_char_find(parser, ')')) {
if (!pm_regexp_char_find(parser, ')')) {
return false;
}
break;
case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
if (!yp_regexp_options_add(&options, *parser->cursor)) {
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
if (!pm_regexp_options_add(&options, *parser->cursor)) {
return false;
}
parser->cursor++;
}
if (yp_regexp_char_is_eof(parser)) {
if (pm_regexp_char_is_eof(parser)) {
return false;
}
@ -473,14 +473,14 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
/* fallthrough */
case '-':
parser->cursor++;
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
if (!yp_regexp_options_remove(&options, *parser->cursor)) {
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
if (!pm_regexp_options_remove(&options, *parser->cursor)) {
return false;
}
parser->cursor++;
}
if (yp_regexp_char_is_eof(parser)) {
if (pm_regexp_char_is_eof(parser)) {
return false;
}
break;
@ -490,15 +490,15 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
}
// Now, parse the expressions within this group.
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ')') {
if (!yp_regexp_parse_expression(parser)) {
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
if (!pm_regexp_parse_expression(parser)) {
return false;
}
yp_regexp_char_accept(parser, '|');
pm_regexp_char_accept(parser, '|');
}
// Finally, make sure we have a closing parenthesis.
return yp_regexp_char_expect(parser, ')');
return pm_regexp_char_expect(parser, ')');
}
// item : anchor
@ -512,35 +512,35 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
// | quantified
// ;
static bool
yp_regexp_parse_item(yp_regexp_parser_t *parser) {
pm_regexp_parse_item(pm_regexp_parser_t *parser) {
switch (*parser->cursor++) {
case '^':
case '$':
return true;
case '\\':
if (!yp_regexp_char_is_eof(parser)) {
if (!pm_regexp_char_is_eof(parser)) {
parser->cursor++;
}
return yp_regexp_parse_quantifier(parser);
return pm_regexp_parse_quantifier(parser);
case '(':
return yp_regexp_parse_group(parser) && yp_regexp_parse_quantifier(parser);
return pm_regexp_parse_group(parser) && pm_regexp_parse_quantifier(parser);
case '[':
return yp_regexp_parse_lbracket(parser) && yp_regexp_parse_quantifier(parser);
return pm_regexp_parse_lbracket(parser) && pm_regexp_parse_quantifier(parser);
default:
return yp_regexp_parse_quantifier(parser);
return pm_regexp_parse_quantifier(parser);
}
}
// expression : item+
// ;
static bool
yp_regexp_parse_expression(yp_regexp_parser_t *parser) {
if (!yp_regexp_parse_item(parser)) {
pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
if (!pm_regexp_parse_item(parser)) {
return false;
}
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
if (!yp_regexp_parse_item(parser)) {
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
if (!pm_regexp_parse_item(parser)) {
return false;
}
}
@ -553,28 +553,28 @@ yp_regexp_parse_expression(yp_regexp_parser_t *parser) {
// | expression '|' pattern
// ;
static bool
yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
return (
(
// Exit early if the pattern is empty.
yp_regexp_char_is_eof(parser) ||
pm_regexp_char_is_eof(parser) ||
// Parse the first expression in the pattern.
yp_regexp_parse_expression(parser)
pm_regexp_parse_expression(parser)
) &&
(
// Return now if we've parsed the entire pattern.
yp_regexp_char_is_eof(parser) ||
pm_regexp_char_is_eof(parser) ||
// Otherwise, we should have a pipe character.
(yp_regexp_char_expect(parser, '|') && yp_regexp_parse_pattern(parser))
(pm_regexp_char_expect(parser, '|') && pm_regexp_parse_pattern(parser))
)
);
}
// Parse a regular expression and extract the names of all of the named capture
// groups.
YP_EXPORTED_FUNCTION bool
yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
yp_regexp_parser_t parser;
yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
return yp_regexp_parse_pattern(&parser);
PRISM_EXPORTED_FUNCTION bool
pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
pm_regexp_parser_t parser;
pm_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
return pm_regexp_parse_pattern(&parser);
}

Просмотреть файл

@ -1,12 +1,12 @@
#ifndef YARP_REGEXP_H
#define YARP_REGEXP_H
#ifndef PRISM_REGEXP_H
#define PRISM_REGEXP_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include "yarp/enc/yp_encoding.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
#include "prism/defines.h"
#include "prism/parser.h"
#include "prism/enc/pm_encoding.h"
#include "prism/util/pm_memchr.h"
#include "prism/util/pm_string_list.h"
#include "prism/util/pm_string.h"
#include <stdbool.h>
#include <stddef.h>
@ -14,6 +14,6 @@
// Parse a regular expression and extract the names of all of the named capture
// groups.
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
#endif

Просмотреть файл

@ -1,26 +1,26 @@
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
#include "yarp/extension.h"
#include "prism/extension.h"
extern VALUE rb_cYARP;
extern VALUE rb_cYARPNode;
extern VALUE rb_cYARPSource;
extern VALUE rb_cYARPToken;
extern VALUE rb_cYARPLocation;
extern VALUE rb_cPrism;
extern VALUE rb_cPrismNode;
extern VALUE rb_cPrismSource;
extern VALUE rb_cPrismToken;
extern VALUE rb_cPrismLocation;
<%- nodes.each do |node| -%>
static VALUE rb_cYARP<%= node.name %>;
static VALUE rb_cPrism<%= node.name %>;
<%- end -%>
static VALUE
yp_location_new(yp_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source) {
pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source) {
VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(end - start) };
return rb_class_new_instance(3, argv, rb_cYARPLocation);
return rb_class_new_instance(3, argv, rb_cPrismLocation);
}
VALUE
yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source) {
ID type = rb_intern(yp_token_type_to_str(token->type));
VALUE location = yp_location_new(parser, token->start, token->end, source);
pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source) {
ID type = rb_intern(pm_token_type_to_str(token->type));
VALUE location = pm_location_new(parser, token->start, token->end, source);
VALUE argv[] = {
ID2SYM(type),
@ -28,17 +28,17 @@ yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALU
location
};
return rb_class_new_instance(3, argv, rb_cYARPToken);
return rb_class_new_instance(3, argv, rb_cPrismToken);
}
static VALUE
yp_string_new(yp_string_t *string, rb_encoding *encoding) {
return rb_enc_str_new((const char *) yp_string_source(string), yp_string_length(string), encoding);
pm_string_new(pm_string_t *string, rb_encoding *encoding) {
return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding);
}
// Create a YARP::Source object from the given parser.
// Create a Prism::Source object from the given parser.
VALUE
yp_source_new(yp_parser_t *parser, rb_encoding *encoding) {
pm_source_new(pm_parser_t *parser, rb_encoding *encoding) {
VALUE source = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
@ -47,28 +47,28 @@ yp_source_new(yp_parser_t *parser, rb_encoding *encoding) {
}
VALUE source_argv[] = { source, offsets };
return rb_class_new_instance(2, source_argv, rb_cYARPSource);
return rb_class_new_instance(2, source_argv, rb_cPrismSource);
}
typedef struct yp_node_stack_node {
struct yp_node_stack_node *prev;
yp_node_t *visit;
typedef struct pm_node_stack_node {
struct pm_node_stack_node *prev;
pm_node_t *visit;
bool visited;
} yp_node_stack_node_t;
} pm_node_stack_node_t;
static void
yp_node_stack_push(yp_node_stack_node_t **stack, yp_node_t *visit) {
yp_node_stack_node_t *node = malloc(sizeof(yp_node_stack_node_t));
pm_node_stack_push(pm_node_stack_node_t **stack, pm_node_t *visit) {
pm_node_stack_node_t *node = malloc(sizeof(pm_node_stack_node_t));
node->prev = *stack;
node->visit = visit;
node->visited = false;
*stack = node;
}
static yp_node_t *
yp_node_stack_pop(yp_node_stack_node_t **stack) {
yp_node_stack_node_t *current = *stack;
yp_node_t *visit = current->visit;
static pm_node_t *
pm_node_stack_pop(pm_node_stack_node_t **stack) {
pm_node_stack_node_t *current = *stack;
pm_node_t *visit = current->visit;
*stack = current->prev;
free(current);
@ -77,46 +77,46 @@ yp_node_stack_pop(yp_node_stack_node_t **stack) {
}
VALUE
yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
VALUE source = yp_source_new(parser, encoding);
pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
VALUE source = pm_source_new(parser, encoding);
ID *constants = calloc(parser->constant_pool.size, sizeof(ID));
for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) {
yp_constant_t constant = parser->constant_pool.constants[index];
pm_constant_t constant = parser->constant_pool.constants[index];
if (constant.id != 0) {
constants[constant.id - 1] = rb_intern3((const char *) constant.start, constant.length, encoding);
}
}
yp_node_stack_node_t *node_stack = NULL;
yp_node_stack_push(&node_stack, node);
pm_node_stack_node_t *node_stack = NULL;
pm_node_stack_push(&node_stack, node);
VALUE value_stack = rb_ary_new();
while (node_stack != NULL) {
if (!node_stack->visited) {
if (node_stack->visit == NULL) {
yp_node_stack_pop(&node_stack);
pm_node_stack_pop(&node_stack);
rb_ary_push(value_stack, Qnil);
continue;
}
yp_node_t *node = node_stack->visit;
pm_node_t *node = node_stack->visit;
node_stack->visited = true;
switch (YP_NODE_TYPE(node)) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
<%- if node.fields.any? { |field| [YARP::NodeField, YARP::OptionalNodeField, YARP::NodeListField].include?(field.class) } -%>
<%- if node.fields.any? { |field| [Prism::NodeField, Prism::OptionalNodeField, Prism::NodeListField].include?(field.class) } -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
yp_<%= node.human %>_t *cast = (yp_<%= node.human %>_t *) node;
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
yp_node_stack_push(&node_stack, (yp_node_t *) cast-><%= field.name %>);
<%- when YARP::NodeListField -%>
<%- when Prism::NodeField, Prism::OptionalNodeField -%>
pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>);
<%- when Prism::NodeListField -%>
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
yp_node_stack_push(&node_stack, (yp_node_t *) cast-><%= field.name %>.nodes[index]);
pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
}
<%- end -%>
<%- end -%>
@ -129,71 +129,71 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
}
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
} else {
yp_node_t *node = yp_node_stack_pop(&node_stack);
pm_node_t *node = pm_node_stack_pop(&node_stack);
switch (YP_NODE_TYPE(node)) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
<%- if node.fields.any? { |field| ![YARP::NodeField, YARP::OptionalNodeField, YARP::FlagsField].include?(field.class) } -%>
yp_<%= node.human %>_t *cast = (yp_<%= node.human %>_t *) node;
<%- if node.fields.any? { |field| ![Prism::NodeField, Prism::OptionalNodeField, Prism::FlagsField].include?(field.class) } -%>
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- end -%>
VALUE argv[<%= node.fields.length + 1 %>];
<%- node.fields.each_with_index do |field, index| -%>
// <%= field.name %>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
<%- when Prism::NodeField, Prism::OptionalNodeField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_pop(value_stack);
<%- when YARP::NodeListField -%>
<%- when Prism::NodeListField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
}
<%- when YARP::StringField -%>
<%- when Prism::StringField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = yp_string_new(&cast-><%= field.name %>, encoding);
<%- when YARP::ConstantField -%>
argv[<%= index %>] = pm_string_new(&cast-><%= field.name %>, encoding);
<%- when Prism::ConstantField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
assert(cast-><%= field.name %> != 0);
argv[<%= index %>] = rb_id2sym(constants[cast-><%= field.name %> - 1]);
<%- when YARP::OptionalConstantField -%>
<%- when Prism::OptionalConstantField -%>
argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : rb_id2sym(constants[cast-><%= field.name %> - 1]);
<%- when YARP::ConstantListField -%>
<%- when Prism::ConstantListField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
assert(cast-><%= field.name %>.ids[index] != 0);
rb_ary_push(argv[<%= index %>], rb_id2sym(constants[cast-><%= field.name %>.ids[index] - 1]));
}
<%- when YARP::LocationField -%>
<%- when Prism::LocationField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = yp_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when YARP::OptionalLocationField -%>
argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when Prism::OptionalLocationField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : yp_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when YARP::UInt32Field -%>
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when Prism::UInt32Field -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(cast-><%= field.name %>);
<%- when YARP::FlagsField -%>
<%- when Prism::FlagsField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(node->flags >> <%= YARP::COMMON_FLAGS %>);
argv[<%= index %>] = ULONG2NUM(node->flags >> <%= Prism::COMMON_FLAGS %>);
<%- else -%>
<%- raise -%>
<%- end -%>
<%- end -%>
// location
argv[<%= node.fields.length %>] = yp_location_new(parser, node->location.start, node->location.end, source);
argv[<%= node.fields.length %>] = pm_location_new(parser, node->location.start, node->location.end, source);
rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 1 %>, argv, rb_cYARP<%= node.name %>));
rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 1 %>, argv, rb_cPrism<%= node.name %>));
break;
}
<%- end -%>
default:
rb_raise(rb_eRuntimeError, "unknown node type: %d", YP_NODE_TYPE(node));
rb_raise(rb_eRuntimeError, "unknown node type: %d", PM_NODE_TYPE(node));
}
}
}
@ -204,8 +204,8 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
}
void
Init_yarp_api_node(void) {
Init_prism_api_node(void) {
<%- nodes.each do |node| -%>
rb_cYARP<%= node.name %> = rb_define_class_under(rb_cYARP, "<%= node.name %>", rb_cYARPNode);
rb_cPrism<%= node.name %> = rb_define_class_under(rb_cPrism, "<%= node.name %>", rb_cPrismNode);
<%- end -%>
}

Просмотреть файл

@ -1,116 +1,116 @@
#ifndef YARP_AST_H
#define YARP_AST_H
#ifndef PRISM_AST_H
#define PRISM_AST_H
#include "yarp/defines.h"
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_string.h"
#include "prism/defines.h"
#include "prism/util/pm_constant_pool.h"
#include "prism/util/pm_string.h"
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
// This enum represents every type of token in the Ruby source.
typedef enum yp_token_type {
typedef enum pm_token_type {
<%- tokens.each do |token| -%>
<%= token.declaration %>
<%- end -%>
YP_TOKEN_MAXIMUM, // the maximum token value
} yp_token_type_t;
PM_TOKEN_MAXIMUM, // the maximum token value
} pm_token_type_t;
// This struct represents a token in the Ruby source. We use it to track both
// type and location information.
typedef struct {
yp_token_type_t type;
pm_token_type_t type;
const uint8_t *start;
const uint8_t *end;
} yp_token_t;
} pm_token_t;
// This represents a range of bytes in the source string to which a node or
// token corresponds.
typedef struct {
const uint8_t *start;
const uint8_t *end;
} yp_location_t;
} pm_location_t;
struct yp_node;
struct pm_node;
typedef struct yp_node_list {
struct yp_node **nodes;
typedef struct pm_node_list {
struct pm_node **nodes;
size_t size;
size_t capacity;
} yp_node_list_t;
} pm_node_list_t;
enum yp_node_type {
enum pm_node_type {
<%- nodes.each_with_index do |node, index| -%>
<%= node.type %> = <%= index + 1 %>,
<%- end -%>
YP_SCOPE_NODE
PM_SCOPE_NODE
};
typedef uint16_t yp_node_type_t;
typedef uint16_t yp_node_flags_t;
typedef uint16_t pm_node_type_t;
typedef uint16_t pm_node_flags_t;
// We store the flags enum in every node in the tree. Some flags are common to
// all nodes (the ones listed below). Others are specific to certain node types.
static const yp_node_flags_t YP_NODE_FLAG_NEWLINE = 0x1;
static const yp_node_flags_t YP_NODE_FLAG_STATIC_LITERAL = 0x2;
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
// For easy access, we define some macros to check node type
#define YP_NODE_TYPE(node) ((enum yp_node_type)node->type)
#define YP_NODE_TYPE_P(node, type) (YP_NODE_TYPE(node) == (type))
#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
// This is the overall tagged union representing a node in the syntax tree.
typedef struct yp_node {
typedef struct pm_node {
// This represents the type of the node. It somewhat maps to the nodes that
// existed in the original grammar and ripper, but it's not a 1:1 mapping.
yp_node_type_t type;
pm_node_type_t type;
// This represents any flags on the node
yp_node_flags_t flags;
pm_node_flags_t flags;
// This is the location of the node in the source. It's a range of bytes
// containing a start and an end.
yp_location_t location;
} yp_node_t;
pm_location_t location;
} pm_node_t;
<%- nodes.each do |node| -%>
// <%= node.name %>
//
// Type: <%= node.type %>
<%- if (node_flags = node.fields.find { |field| field.is_a? YARP::FlagsField }) -%>
<%- if (node_flags = node.fields.find { |field| field.is_a? Prism::FlagsField }) -%>
// Flags:
<%- found = flags.find { |flag| flag.name == node_flags.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
<%- found.values.each do |value| -%>
// YP_<%= found.human.upcase %>_<%= value.name %>
// PM_<%= found.human.upcase %>_<%= value.name %>
<%- end -%>
<%- end -%>
typedef struct yp_<%= node.human %> {
yp_node_t base;
<%- node.fields.grep_v(YARP::FlagsField).each do |field| -%>
typedef struct pm_<%= node.human %> {
pm_node_t base;
<%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
<%= case field
when YARP::NodeField, YARP::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
when YARP::NodeListField then "struct yp_node_list #{field.name}"
when YARP::ConstantField, YARP::OptionalConstantField then "yp_constant_id_t #{field.name}"
when YARP::ConstantListField then "yp_constant_id_list_t #{field.name}"
when YARP::StringField then "yp_string_t #{field.name}"
when YARP::LocationField, YARP::OptionalLocationField then "yp_location_t #{field.name}"
when YARP::UInt32Field then "uint32_t #{field.name}"
when Prism::NodeField, Prism::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
when Prism::NodeListField then "struct pm_node_list #{field.name}"
when Prism::ConstantField, Prism::OptionalConstantField then "pm_constant_id_t #{field.name}"
when Prism::ConstantListField then "pm_constant_id_list_t #{field.name}"
when Prism::StringField then "pm_string_t #{field.name}"
when Prism::LocationField, Prism::OptionalLocationField then "pm_location_t #{field.name}"
when Prism::UInt32Field then "uint32_t #{field.name}"
else raise field.class.name
end
%>;
<%- end -%>
} yp_<%= node.human %>_t;
} pm_<%= node.human %>_t;
<%- end -%>
<%- flags.each do |flag| -%>
// <%= flag.name %>
typedef enum {
<%- flag.values.each.with_index(YARP::COMMON_FLAGS) do |value, index| -%>
YP_<%= flag.human.upcase %>_<%= value.name %> = 1 << <%= index %>,
<%- flag.values.each.with_index(Prism::COMMON_FLAGS) do |value, index| -%>
PM_<%= flag.human.upcase %>_<%= value.name %> = 1 << <%= index %>,
<%- end -%>
} yp_<%= flag.human %>_t;
} pm_<%= flag.human %>_t;
<%- end -%>
#define YP_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= YARP::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
#endif // YARP_AST_H
#endif // PRISM_AST_H

Просмотреть файл

@ -1,4 +1,4 @@
module YARP
module Prism
# A compiler is a visitor that returns the value of each node as it visits.
# This is as opposed to a visitor which will only walk the tree. This can be
# useful when you are trying to compile a tree into a different format.
@ -6,14 +6,14 @@ module YARP
# For example, to build a representation of the tree as s-expressions, you
# could write:
#
# class SExpressions < YARP::Compiler
# class SExpressions < Prism::Compiler
# def visit_arguments_node(node) = [:arguments, super]
# def visit_call_node(node) = [:call, super]
# def visit_integer_node(node) = [:integer]
# def visit_program_node(node) = [:program, super]
# end
#
# YARP.parse("1 + 2").value.accept(SExpressions.new)
# Prism.parse("1 + 2").value.accept(SExpressions.new)
# # => [:program, [[[:call, [[:integer], [:arguments, [[:integer]]]]]]]]
#
class Compiler

Просмотреть файл

@ -1,4 +1,4 @@
module YARP
module Prism
# The dispatcher class fires events for nodes that are found while walking an
# AST to all registered listeners. It's useful for performing different types
# of analysis on the AST while only having to walk the tree once.
@ -19,7 +19,7 @@ module YARP
#
# Then, you can walk any number of trees and dispatch events to the listeners:
#
# result = YARP.parse("001 + 002 + 003")
# result = Prism.parse("001 + 002 + 003")
# dispatcher.dispatch(result.value)
#
# Optionally, you can also use `#dispatch_once` to dispatch enter and leave

Просмотреть файл

@ -1,26 +1,26 @@
module YARP
# The DSL module provides a set of methods that can be used to create YARP
module Prism
# The DSL module provides a set of methods that can be used to create prism
# nodes in a more concise manner. For example, instead of writing:
#
# source = YARP::Source.new("[1]")
# source = Prism::Source.new("[1]")
#
# YARP::ArrayNode.new(
# Prism::ArrayNode.new(
# [
# YARP::IntegerNode.new(
# YARP::IntegerBaseFlags::DECIMAL,
# YARP::Location.new(source, 1, 1),
# Prism::IntegerNode.new(
# Prism::IntegerBaseFlags::DECIMAL,
# Prism::Location.new(source, 1, 1),
# )
# ],
# YARP::Location.new(source, 0, 1),
# YARP::Location.new(source, 2, 1)
# Prism::Location.new(source, 0, 1),
# Prism::Location.new(source, 2, 1)
# )
#
# you could instead write:
#
# source = YARP::Source.new("[1]")
# source = Prism::Source.new("[1]")
#
# ArrayNode(
# IntegerNode(YARP::IntegerBaseFlags::DECIMAL, Location(source, 1, 1))),
# IntegerNode(Prism::IntegerBaseFlags::DECIMAL, Location(source, 1, 1))),
# Location(source, 0, 1),
# Location(source, 2, 1)
# )

Просмотреть файл

@ -1,4 +1,4 @@
module YARP
module Prism
# This visitor walks through the tree and copies each node as it is being
# visited. This is useful for consumers that want to mutate the tree, as you
# can change subtrees in place without effecting the rest of the tree.
@ -7,9 +7,9 @@ module YARP
<%= "\n" if index != 0 -%>
# Copy a <%= node.name %> node
def visit_<%= node.human %>(node)
<%- fields = node.fields.select { |field| [YARP::NodeField, YARP::OptionalNodeField, YARP::NodeListField].include?(field.class) } -%>
<%- fields = node.fields.select { |field| [Prism::NodeField, Prism::OptionalNodeField, Prism::NodeListField].include?(field.class) } -%>
<%- if fields.any? -%>
node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(YARP::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
<%- else -%>
node.copy
<%- end -%>

Просмотреть файл

@ -1,4 +1,4 @@
module YARP
module Prism
# This represents a node in the tree. It is the parent class of all of the
# various node types.
class Node
@ -35,7 +35,7 @@ module YARP
<%= "#{node.comment.split("\n").map { |line| line.empty? ? "#" : "# #{line}" }.join("\n ")}\n " if node.comment %>class <%= node.name -%> < Node
<%- node.fields.each do |field| -%>
# attr_reader <%= field.name %>: <%= field.rbs_class %>
<%= "private " if field.is_a?(YARP::FlagsField) %>attr_reader :<%= field.name %>
<%= "private " if field.is_a?(Prism::FlagsField) %>attr_reader :<%= field.name %>
<%- end -%>
# def initialize: (<%= (node.fields.map { |field| "#{field.name}: #{field.rbs_class}" } + ["location: Location"]).join(", ") %>) -> void
@ -60,9 +60,9 @@ module YARP
def set_newline_flag(newline_marked)
<%- field = node.fields.find { |f| f.name == node.newline } or raise node.newline -%>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
<%- when Prism::NodeField, Prism::OptionalNodeField -%>
<%= field.name %>.set_newline_flag(newline_marked)
<%- when YARP::NodeListField -%>
<%- when Prism::NodeListField -%>
first = <%= field.name %>.first
first.set_newline_flag(newline_marked) if first
<%- else raise field.class.name -%>
@ -74,23 +74,23 @@ module YARP
def child_nodes
[<%= node.fields.map { |field|
case field
when YARP::NodeField, YARP::OptionalNodeField then field.name
when YARP::NodeListField then "*#{field.name}"
when Prism::NodeField, Prism::OptionalNodeField then field.name
when Prism::NodeListField then "*#{field.name}"
end
}.compact.join(", ") %>]
end
# def compact_child_nodes: () -> Array[Node]
def compact_child_nodes
<%- if node.fields.any? { |field| field.is_a?(YARP::OptionalNodeField) } -%>
<%- if node.fields.any? { |field| field.is_a?(Prism::OptionalNodeField) } -%>
compact = []
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when YARP::NodeField -%>
<%- when Prism::NodeField -%>
compact << <%= field.name %>
<%- when YARP::OptionalNodeField -%>
<%- when Prism::OptionalNodeField -%>
compact << <%= field.name %> if <%= field.name %>
<%- when YARP::NodeListField -%>
<%- when Prism::NodeListField -%>
compact.concat(<%= field.name %>)
<%- end -%>
<%- end -%>
@ -98,8 +98,8 @@ module YARP
<%- else -%>
[<%= node.fields.map { |field|
case field
when YARP::NodeField then field.name
when YARP::NodeListField then "*#{field.name}"
when Prism::NodeField then field.name
when Prism::NodeListField then "*#{field.name}"
end
}.compact.join(", ") %>]
<%- end -%>
@ -109,8 +109,8 @@ module YARP
def comment_targets
[<%= node.fields.map { |field|
case field
when YARP::NodeField, YARP::LocationField then field.name
when YARP::OptionalNodeField, YARP::NodeListField, YARP::OptionalLocationField then "*#{field.name}"
when Prism::NodeField, Prism::LocationField then field.name
when Prism::OptionalNodeField, Prism::NodeListField, Prism::OptionalLocationField then "*#{field.name}"
end
}.compact.join(", ") %>]
end
@ -133,7 +133,7 @@ module YARP
end
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when YARP::LocationField -%>
<%- when Prism::LocationField -%>
<%- raise unless field.name.end_with?("_loc") -%>
<%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
@ -141,7 +141,7 @@ module YARP
def <%= field.name.delete_suffix("_loc") %>
<%= field.name %>.slice
end
<%- when YARP::OptionalLocationField -%>
<%- when Prism::OptionalLocationField -%>
<%- raise unless field.name.end_with?("_loc") -%>
<%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
@ -149,7 +149,7 @@ module YARP
def <%= field.name.delete_suffix("_loc") %>
<%= field.name %>&.slice
end
<%- when YARP::FlagsField -%>
<%- when Prism::FlagsField -%>
<%- flags.find { |flag| flag.name == field.kind }.tap { |flag| raise "Expected to find #{field.kind}" unless flag }.values.each do |value| -%>
# def <%= value.name.downcase %>?: () -> bool
@ -165,27 +165,27 @@ module YARP
<%- node.fields.each_with_index do |field, index| -%>
<%- pointer, preadd = index == node.fields.length - 1 ? ["└── ", " "] : ["├── ", "│ "] -%>
<%- case field -%>
<%- when YARP::NodeListField -%>
<%- when Prism::NodeListField -%>
inspector << "<%= pointer %><%= field.name %>: #{inspector.list("#{inspector.prefix}<%= preadd %>", <%= field.name %>)}"
<%- when YARP::ConstantListField -%>
<%- when Prism::ConstantListField -%>
inspector << "<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n"
<%- when YARP::NodeField -%>
<%- when Prism::NodeField -%>
inspector << "<%= pointer %><%= field.name %>:\n"
inspector << inspector.child_node(<%= field.name %>, "<%= preadd %>")
<%- when YARP::OptionalNodeField -%>
<%- when Prism::OptionalNodeField -%>
if (<%= field.name %> = self.<%= field.name %>).nil?
inspector << "<%= pointer %><%= field.name %>: ∅\n"
else
inspector << "<%= pointer %><%= field.name %>:\n"
inspector << <%= field.name %>.inspect(inspector.child_inspector("<%= preadd %>")).delete_prefix(inspector.prefix)
end
<%- when YARP::ConstantField, YARP::OptionalConstantField, YARP::StringField, YARP::UInt32Field -%>
<%- when Prism::ConstantField, Prism::OptionalConstantField, Prism::StringField, Prism::UInt32Field -%>
inspector << "<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n"
<%- when YARP::FlagsField -%>
<%- when Prism::FlagsField -%>
<%- flag = flags.find { |flag| flag.name == field.kind }.tap { |flag| raise unless flag } -%>
flags = [<%= flag.values.map { |value| "(\"#{value.name.downcase}\" if #{value.name.downcase}?)" }.join(", ") %>].compact
inspector << "<%= pointer %><%= field.name %>: #{flags.empty? ? "∅" : flags.join(", ")}\n"
<%- when YARP::LocationField, YARP::OptionalLocationField -%>
<%- when Prism::LocationField, Prism::OptionalLocationField -%>
inspector << "<%= pointer %><%= field.name %>: #{inspector.location(<%= field.name %>)}\n"
<%- else -%>
<%- raise -%>

Просмотреть файл

@ -11,7 +11,7 @@ if String.instance_method(:unpack1).parameters.none? { |_, name| name == :offset
)
end
module YARP
module Prism
module Serialize
MAJOR_VERSION = 0
MINOR_VERSION = 12
@ -67,7 +67,7 @@ module YARP
length = load_varint
lex_state = load_varint
location = Location.new(@source, start, length)
tokens << [YARP::Token.new(type, location.slice, location), lex_state]
tokens << [Prism::Token.new(type, location.slice, location), lex_state]
end
tokens
@ -83,11 +83,11 @@ module YARP
end
raise "Expected to consume all bytes while deserializing" unless @io.eof?
YARP::ParseResult.new(tokens, comments, errors, warnings, @source)
Prism::ParseResult.new(tokens, comments, errors, warnings, @source)
end
def load_nodes
raise "Invalid serialization" if io.read(4) != "YARP"
raise "Invalid serialization" if io.read(5) != "PRISM"
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
only_semantic_fields = io.read(1).unpack1("C")
unless only_semantic_fields == 0
@ -107,7 +107,7 @@ module YARP
def load_result
node, comments, errors, warnings = load_nodes
YARP::ParseResult.new(node, comments, errors, warnings, @source)
Prism::ParseResult.new(node, comments, errors, warnings, @source)
end
private
@ -204,16 +204,16 @@ module YARP
<%- end -%>
<%= node.name %>.new(<%= (node.fields.map { |field|
case field
when YARP::NodeField then "load_node"
when YARP::OptionalNodeField then "load_optional_node"
when YARP::StringField then "load_string"
when YARP::NodeListField then "Array.new(load_varint) { load_node }"
when YARP::ConstantField then "load_required_constant"
when YARP::OptionalConstantField then "load_optional_constant"
when YARP::ConstantListField then "Array.new(load_varint) { load_required_constant }"
when YARP::LocationField then "load_location"
when YARP::OptionalLocationField then "load_optional_location"
when YARP::UInt32Field, YARP::FlagsField then "load_varint"
when Prism::NodeField then "load_node"
when Prism::OptionalNodeField then "load_optional_node"
when Prism::StringField then "load_string"
when Prism::NodeListField then "Array.new(load_varint) { load_node }"
when Prism::ConstantField then "load_required_constant"
when Prism::OptionalConstantField then "load_optional_constant"
when Prism::ConstantListField then "Array.new(load_varint) { load_required_constant }"
when Prism::LocationField then "load_location"
when Prism::OptionalLocationField then "load_optional_location"
when Prism::UInt32Field, Prism::FlagsField then "load_varint"
else raise
end
} + ["location"]).join(", ") -%>)

Просмотреть файл

@ -1,4 +1,4 @@
module YARP
module Prism
# A class that knows how to walk down the tree. None of the individual visit
# methods are implemented on this visitor, so it forces the consumer to
# implement each one that they need. For a default implementation that
@ -25,7 +25,7 @@ module YARP
# For example, to find all of the method calls that call the `foo` method, you
# could write:
#
# class FooCalls < YARP::Visitor
# class FooCalls < Prism::Visitor
# def visit_call_node(node)
# if node.name == "foo"
# # Do something with the node

Просмотреть файл

@ -1,78 +1,78 @@
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
#include "yarp/node.h"
#include "prism/node.h"
// Clear the node but preserves the location.
void yp_node_clear(yp_node_t *node) {
yp_location_t location = node->location;
memset(node, 0, sizeof(yp_node_t));
void pm_node_clear(pm_node_t *node) {
pm_location_t location = node->location;
memset(node, 0, sizeof(pm_node_t));
node->location = location;
}
static void
yp_node_memsize_node(yp_node_t *node, yp_memsize_t *memsize);
pm_node_memsize_node(pm_node_t *node, pm_memsize_t *memsize);
// Calculate the size of the node list in bytes.
static size_t
yp_node_list_memsize(yp_node_list_t *node_list, yp_memsize_t *memsize) {
size_t size = sizeof(yp_node_list_t) + (node_list->capacity * sizeof(yp_node_t *));
pm_node_list_memsize(pm_node_list_t *node_list, pm_memsize_t *memsize) {
size_t size = sizeof(pm_node_list_t) + (node_list->capacity * sizeof(pm_node_t *));
for (size_t index = 0; index < node_list->size; index++) {
yp_node_memsize_node(node_list->nodes[index], memsize);
pm_node_memsize_node(node_list->nodes[index], memsize);
}
return size;
}
// Append a new node onto the end of the node list.
void
yp_node_list_append(yp_node_list_t *list, yp_node_t *node) {
pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
if (list->size == list->capacity) {
list->capacity = list->capacity == 0 ? 4 : list->capacity * 2;
list->nodes = (yp_node_t **) realloc(list->nodes, sizeof(yp_node_t *) * list->capacity);
list->nodes = (pm_node_t **) realloc(list->nodes, sizeof(pm_node_t *) * list->capacity);
}
list->nodes[list->size++] = node;
}
YP_EXPORTED_FUNCTION void
yp_node_destroy(yp_parser_t *parser, yp_node_t *node);
PRISM_EXPORTED_FUNCTION void
pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
// Deallocate the inner memory of a list of nodes. The parser argument is not
// used, but is here for the future possibility of pre-allocating memory pools.
static void
yp_node_list_free(yp_parser_t *parser, yp_node_list_t *list) {
pm_node_list_free(pm_parser_t *parser, pm_node_list_t *list) {
if (list->capacity > 0) {
for (size_t index = 0; index < list->size; index++) {
yp_node_destroy(parser, list->nodes[index]);
pm_node_destroy(parser, list->nodes[index]);
}
free(list->nodes);
}
}
// Deallocate the space for a yp_node_t. Similarly to yp_node_alloc, we're not
// Deallocate the space for a pm_node_t. Similarly to pm_node_alloc, we're not
// using the parser argument, but it's there to allow for the future possibility
// of pre-allocating larger memory pools.
YP_EXPORTED_FUNCTION void
yp_node_destroy(yp_parser_t *parser, yp_node_t *node) {
switch (YP_NODE_TYPE(node)) {
PRISM_EXPORTED_FUNCTION void
pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
<%- if node.fields.any? { |field| ![YARP::LocationField, YARP::OptionalLocationField, YARP::UInt32Field, YARP::FlagsField, YARP::ConstantField, YARP::OptionalConstantField].include?(field.class) } -%>
yp_<%= node.human %>_t *cast = (yp_<%= node.human %>_t *) node;
<%- if node.fields.any? { |field| ![Prism::LocationField, Prism::OptionalLocationField, Prism::UInt32Field, Prism::FlagsField, Prism::ConstantField, Prism::OptionalConstantField].include?(field.class) } -%>
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when YARP::LocationField, YARP::OptionalLocationField, YARP::UInt32Field, YARP::FlagsField, YARP::ConstantField, YARP::OptionalConstantField -%>
<%- when YARP::NodeField -%>
yp_node_destroy(parser, (yp_node_t *)cast-><%= field.name %>);
<%- when YARP::OptionalNodeField -%>
<%- when Prism::LocationField, Prism::OptionalLocationField, Prism::UInt32Field, Prism::FlagsField, Prism::ConstantField, Prism::OptionalConstantField -%>
<%- when Prism::NodeField -%>
pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
<%- when Prism::OptionalNodeField -%>
if (cast-><%= field.name %> != NULL) {
yp_node_destroy(parser, (yp_node_t *)cast-><%= field.name %>);
pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
}
<%- when YARP::StringField -%>
yp_string_free(&cast-><%= field.name %>);
<%- when YARP::NodeListField -%>
yp_node_list_free(parser, &cast-><%= field.name %>);
<%- when YARP::ConstantListField -%>
yp_constant_id_list_free(&cast-><%= field.name %>);
<%- when Prism::StringField -%>
pm_string_free(&cast-><%= field.name %>);
<%- when Prism::NodeListField -%>
pm_node_list_free(parser, &cast-><%= field.name %>);
<%- when Prism::ConstantListField -%>
pm_constant_id_list_free(&cast-><%= field.name %>);
<%- else -%>
<%- raise -%>
<%- end -%>
@ -89,42 +89,42 @@ yp_node_destroy(yp_parser_t *parser, yp_node_t *node) {
}
static void
yp_node_memsize_node(yp_node_t *node, yp_memsize_t *memsize) {
pm_node_memsize_node(pm_node_t *node, pm_memsize_t *memsize) {
memsize->node_count++;
switch (YP_NODE_TYPE(node)) {
switch (PM_NODE_TYPE(node)) {
// We do not calculate memsize of a ScopeNode
// as it should never be generated
case YP_SCOPE_NODE:
case PM_SCOPE_NODE:
return;
<%- nodes.each do |node| -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
case <%= node.type %>: {
yp_<%= node.human %>_t *cast = (yp_<%= node.human %>_t *) node;
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
memsize->memsize += sizeof(*cast);
<%- if node.fields.any? { |f| f.is_a?(YARP::NodeListField) } -%>
<%- if node.fields.any? { |f| f.is_a?(Prism::NodeListField) } -%>
// Node lists will add in their own sizes below.
memsize->memsize -= sizeof(yp_node_list_t) * <%= node.fields.count { |f| f.is_a?(YARP::NodeListField) } %>;
memsize->memsize -= sizeof(pm_node_list_t) * <%= node.fields.count { |f| f.is_a?(Prism::NodeListField) } %>;
<%- end -%>
<%- if node.fields.any? { |f| f.is_a?(YARP::ConstantListField) } -%>
<%- if node.fields.any? { |f| f.is_a?(Prism::ConstantListField) } -%>
// Constant id lists will add in their own sizes below.
memsize->memsize -= sizeof(yp_constant_id_list_t) * <%= node.fields.count { |f| f.is_a?(YARP::ConstantListField) } %>;
memsize->memsize -= sizeof(pm_constant_id_list_t) * <%= node.fields.count { |f| f.is_a?(Prism::ConstantListField) } %>;
<%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when YARP::ConstantField, YARP::OptionalConstantField, YARP::UInt32Field, YARP::FlagsField, YARP::LocationField, YARP::OptionalLocationField -%>
<%- when YARP::NodeField -%>
yp_node_memsize_node((yp_node_t *)cast-><%= field.name %>, memsize);
<%- when YARP::OptionalNodeField -%>
<%- when Prism::ConstantField, Prism::OptionalConstantField, Prism::UInt32Field, Prism::FlagsField, Prism::LocationField, Prism::OptionalLocationField -%>
<%- when Prism::NodeField -%>
pm_node_memsize_node((pm_node_t *)cast-><%= field.name %>, memsize);
<%- when Prism::OptionalNodeField -%>
if (cast-><%= field.name %> != NULL) {
yp_node_memsize_node((yp_node_t *)cast-><%= field.name %>, memsize);
pm_node_memsize_node((pm_node_t *)cast-><%= field.name %>, memsize);
}
<%- when YARP::StringField -%>
memsize->memsize += yp_string_memsize(&cast-><%= field.name %>);
<%- when YARP::NodeListField -%>
memsize->memsize += yp_node_list_memsize(&cast-><%= field.name %>, memsize);
<%- when YARP::ConstantListField -%>
memsize->memsize += yp_constant_id_list_memsize(&cast-><%= field.name %>);
<%- when Prism::StringField -%>
memsize->memsize += pm_string_memsize(&cast-><%= field.name %>);
<%- when Prism::NodeListField -%>
memsize->memsize += pm_node_list_memsize(&cast-><%= field.name %>, memsize);
<%- when Prism::ConstantListField -%>
memsize->memsize += pm_constant_id_list_memsize(&cast-><%= field.name %>);
<%- else -%>
<%- raise -%>
<%- end -%>
@ -137,15 +137,15 @@ yp_node_memsize_node(yp_node_t *node, yp_memsize_t *memsize) {
}
// Calculates the memory footprint of a given node.
YP_EXPORTED_FUNCTION void
yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize) {
*memsize = (yp_memsize_t) { .memsize = 0, .node_count = 0 };
yp_node_memsize_node(node, memsize);
PRISM_EXPORTED_FUNCTION void
pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize) {
*memsize = (pm_memsize_t) { .memsize = 0, .node_count = 0 };
pm_node_memsize_node(node, memsize);
}
// Returns a string representation of the given node type.
YP_EXPORTED_FUNCTION const char *
yp_node_type_to_str(yp_node_type_t node_type)
PRISM_EXPORTED_FUNCTION const char *
pm_node_type_to_str(pm_node_type_t node_type)
{
switch (node_type) {
<%- nodes.each do |node| -%>

Просмотреть файл

@ -1,92 +1,92 @@
#include "yarp/defines.h"
#include "prism/defines.h"
#include <stdio.h>
#include "yarp/ast.h"
#include "yarp/parser.h"
#include "yarp/util/yp_buffer.h"
#include "prism/ast.h"
#include "prism/parser.h"
#include "prism/util/pm_buffer.h"
static void
prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) {
prettyprint_location(pm_buffer_t *buffer, pm_parser_t *parser, pm_location_t *location) {
char printed[] = "[0000-0000]";
snprintf(printed, sizeof(printed), "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
yp_buffer_append_str(buffer, printed, strlen(printed));
pm_buffer_append_str(buffer, printed, strlen(printed));
}
static void
prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
switch (YP_NODE_TYPE(node)) {
prettyprint_node(pm_buffer_t *buffer, pm_parser_t *parser, pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
// We do not need to print a ScopeNode as it's not part
// of the AST
case YP_SCOPE_NODE:
case PM_SCOPE_NODE:
return;
<%- nodes.each do |node| -%>
case <%= node.type %>: {
yp_buffer_append_str(buffer, "<%= node.name %>(", <%= node.name.length + 1 %>);
pm_buffer_append_str(buffer, "<%= node.name %>(", <%= node.name.length + 1 %>);
<%- node.fields.each_with_index do |field, index| -%>
<%= "yp_buffer_append_str(buffer, \", \", 2);" if index != 0 -%>
<%= "pm_buffer_append_str(buffer, \", \", 2);" if index != 0 -%>
<%- case field -%>
<%- when YARP::NodeField -%>
prettyprint_node(buffer, parser, (yp_node_t *)((yp_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when YARP::OptionalNodeField -%>
if (((yp_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
yp_buffer_append_str(buffer, "nil", 3);
<%- when Prism::NodeField -%>
prettyprint_node(buffer, parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when Prism::OptionalNodeField -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
pm_buffer_append_str(buffer, "nil", 3);
} else {
prettyprint_node(buffer, parser, (yp_node_t *)((yp_<%= node.human %>_t *)node)-><%= field.name %>);
prettyprint_node(buffer, parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>);
}
<%- when YARP::StringField -%>
yp_buffer_append_str(buffer, "\"", 1);
yp_buffer_append_bytes(buffer, yp_string_source(&((yp_<%= node.human %>_t *)node)-><%= field.name %>), yp_string_length(&((yp_<%= node.human %>_t *)node)-><%= field.name %>));
yp_buffer_append_str(buffer, "\"", 1);
<%- when YARP::NodeListField -%>
yp_buffer_append_str(buffer, "[", 1);
for (uint32_t index = 0; index < ((yp_<%= node.human %>_t *)node)-><%= field.name %>.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
prettyprint_node(buffer, parser, (yp_node_t *) ((yp_<%= node.human %>_t *) node)-><%= field.name %>.nodes[index]);
<%- when Prism::StringField -%>
pm_buffer_append_str(buffer, "\"", 1);
pm_buffer_append_bytes(buffer, pm_string_source(&((pm_<%= node.human %>_t *)node)-><%= field.name %>), pm_string_length(&((pm_<%= node.human %>_t *)node)-><%= field.name %>));
pm_buffer_append_str(buffer, "\"", 1);
<%- when Prism::NodeListField -%>
pm_buffer_append_str(buffer, "[", 1);
for (uint32_t index = 0; index < ((pm_<%= node.human %>_t *)node)-><%= field.name %>.size; index++) {
if (index != 0) pm_buffer_append_str(buffer, ", ", 2);
prettyprint_node(buffer, parser, (pm_node_t *) ((pm_<%= node.human %>_t *) node)-><%= field.name %>.nodes[index]);
}
yp_buffer_append_str(buffer, "]", 1);
<%- when YARP::ConstantField -%>
pm_buffer_append_str(buffer, "]", 1);
<%- when Prism::ConstantField -%>
char <%= field.name %>_buffer[12];
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "%u", ((yp_<%= node.human %>_t *)node)-><%= field.name %>);
yp_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
<%- when YARP::OptionalConstantField -%>
if (((yp_<%= node.human %>_t *)node)-><%= field.name %> == 0) {
yp_buffer_append_str(buffer, "nil", 3);
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "%u", ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
pm_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
<%- when Prism::OptionalConstantField -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %> == 0) {
pm_buffer_append_str(buffer, "nil", 3);
} else {
char <%= field.name %>_buffer[12];
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "%u", ((yp_<%= node.human %>_t *)node)-><%= field.name %>);
yp_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "%u", ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
pm_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
}
<%- when YARP::ConstantListField -%>
yp_buffer_append_str(buffer, "[", 1);
for (uint32_t index = 0; index < ((yp_<%= node.human %>_t *)node)-><%= field.name %>.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
<%- when Prism::ConstantListField -%>
pm_buffer_append_str(buffer, "[", 1);
for (uint32_t index = 0; index < ((pm_<%= node.human %>_t *)node)-><%= field.name %>.size; index++) {
if (index != 0) pm_buffer_append_str(buffer, ", ", 2);
char <%= field.name %>_buffer[12];
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "%u", ((yp_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]);
yp_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "%u", ((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]);
pm_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
}
yp_buffer_append_str(buffer, "]", 1);
<%- when YARP::LocationField -%>
prettyprint_location(buffer, parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when YARP::OptionalLocationField -%>
if (((yp_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
yp_buffer_append_str(buffer, "nil", 3);
pm_buffer_append_str(buffer, "]", 1);
<%- when Prism::LocationField -%>
prettyprint_location(buffer, parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when Prism::OptionalLocationField -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
pm_buffer_append_str(buffer, "nil", 3);
} else {
prettyprint_location(buffer, parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>);
prettyprint_location(buffer, parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>);
}
<%- when YARP::UInt32Field -%>
<%- when Prism::UInt32Field -%>
char <%= field.name %>_buffer[12];
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "+%d", ((yp_<%= node.human %>_t *)node)-><%= field.name %>);
yp_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
<%- when YARP::FlagsField -%>
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "+%d", ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
pm_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
<%- when Prism::FlagsField -%>
char <%= field.name %>_buffer[12];
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "+%d", node->flags >> <%= YARP::COMMON_FLAGS %>);
yp_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
snprintf(<%= field.name %>_buffer, sizeof(<%= field.name %>_buffer), "+%d", node->flags >> <%= Prism::COMMON_FLAGS %>);
pm_buffer_append_str(buffer, <%= field.name %>_buffer, strlen(<%= field.name %>_buffer));
<%- else -%>
<%- raise -%>
<%- end -%>
<%- end -%>
yp_buffer_append_str(buffer, ")", 1);
pm_buffer_append_str(buffer, ")", 1);
break;
}
<%- end -%>
@ -94,18 +94,18 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
}
void
yp_print_node(yp_parser_t *parser, yp_node_t *node) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) return;
pm_print_node(pm_parser_t *parser, pm_node_t *node) {
pm_buffer_t buffer;
if (!pm_buffer_init(&buffer)) return;
prettyprint_node(&buffer, parser, node);
printf("%.*s\n", (int) buffer.length, buffer.value);
yp_buffer_free(&buffer);
pm_buffer_free(&buffer);
}
// Pretty-prints the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void
yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
PRISM_EXPORTED_FUNCTION void
pm_prettyprint(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
prettyprint_node(buffer, parser, node);
}

Просмотреть файл

@ -1,64 +1,64 @@
#include "yarp.h"
#include "prism.h"
#include <stdio.h>
static inline uint32_t
yp_ptrdifft_to_u32(ptrdiff_t value) {
pm_ptrdifft_to_u32(ptrdiff_t value) {
assert(value >= 0 && ((unsigned long) value) < UINT32_MAX);
return (uint32_t) value;
}
static inline uint32_t
yp_sizet_to_u32(size_t value) {
pm_sizet_to_u32(size_t value) {
assert(value < UINT32_MAX);
return (uint32_t) value;
}
static void
yp_serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *buffer) {
pm_serialize_location(pm_parser_t *parser, pm_location_t *location, pm_buffer_t *buffer) {
assert(location->start);
assert(location->end);
assert(location->start <= location->end);
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(location->start - parser->start));
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(location->end - location->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(location->start - parser->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(location->end - location->start));
}
static void
yp_serialize_string(yp_parser_t *parser, yp_string_t *string, yp_buffer_t *buffer) {
pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffer) {
switch (string->type) {
case YP_STRING_SHARED: {
yp_buffer_append_u8(buffer, 1);
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(yp_string_source(string) - parser->start));
yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_string_length(string)));
case PM_STRING_SHARED: {
pm_buffer_append_u8(buffer, 1);
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(pm_string_source(string) - parser->start));
pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_string_length(string)));
break;
}
case YP_STRING_OWNED:
case YP_STRING_CONSTANT: {
uint32_t length = yp_sizet_to_u32(yp_string_length(string));
yp_buffer_append_u8(buffer, 2);
yp_buffer_append_u32(buffer, length);
yp_buffer_append_bytes(buffer, yp_string_source(string), length);
case PM_STRING_OWNED:
case PM_STRING_CONSTANT: {
uint32_t length = pm_sizet_to_u32(pm_string_length(string));
pm_buffer_append_u8(buffer, 2);
pm_buffer_append_u32(buffer, length);
pm_buffer_append_bytes(buffer, pm_string_source(string), length);
break;
}
case YP_STRING_MAPPED:
case PM_STRING_MAPPED:
assert(false && "Cannot serialize mapped strings.");
break;
}
}
void
yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_buffer_append_u8(buffer, (uint8_t) YP_NODE_TYPE(node));
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_buffer_append_u8(buffer, (uint8_t) PM_NODE_TYPE(node));
size_t offset = buffer->length;
yp_serialize_location(parser, &node->location, buffer);
pm_serialize_location(parser, &node->location, buffer);
switch (YP_NODE_TYPE(node)) {
switch (PM_NODE_TYPE(node)) {
// We do not need to serialize a ScopeNode ever as
// it is not part of the AST
case YP_SCOPE_NODE:
case PM_SCOPE_NODE:
return;
<%- nodes.each do |node| -%>
case <%= node.type %>: {
@ -66,58 +66,58 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
// serialize length
// encoding of location u32s make us need to save this offset.
size_t length_offset = buffer->length;
yp_buffer_append_str(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */
pm_buffer_append_str(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */
<%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when YARP::NodeField -%>
yp_serialize_node(parser, (yp_node_t *)((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- when YARP::OptionalNodeField -%>
if (((yp_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
yp_buffer_append_u8(buffer, 0);
<%- when Prism::NodeField -%>
pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- when Prism::OptionalNodeField -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
pm_buffer_append_u8(buffer, 0);
} else {
yp_serialize_node(parser, (yp_node_t *)((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
<%- when YARP::StringField -%>
yp_serialize_string(parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- when YARP::NodeListField -%>
uint32_t <%= field.name %>_size = yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= field.name %>.size);
yp_buffer_append_u32(buffer, <%= field.name %>_size);
<%- when Prism::StringField -%>
pm_serialize_string(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- when Prism::NodeListField -%>
uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
pm_buffer_append_u32(buffer, <%= field.name %>_size);
for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
yp_serialize_node(parser, (yp_node_t *) ((yp_<%= node.human %>_t *)node)-><%= field.name %>.nodes[index], buffer);
pm_serialize_node(parser, (pm_node_t *) ((pm_<%= node.human %>_t *)node)-><%= field.name %>.nodes[index], buffer);
}
<%- when YARP::ConstantField, YARP::OptionalConstantField -%>
yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= field.name %>));
<%- when YARP::ConstantListField -%>
uint32_t <%= field.name %>_size = yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= field.name %>.size);
yp_buffer_append_u32(buffer, <%= field.name %>_size);
<%- when Prism::ConstantField, Prism::OptionalConstantField -%>
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>));
<%- when Prism::ConstantListField -%>
uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
pm_buffer_append_u32(buffer, <%= field.name %>_size);
for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
pm_buffer_append_u32(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
}
<%- when YARP::LocationField -%>
<%- when Prism::LocationField -%>
<%- if field.should_be_serialized? -%>
yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- end -%>
<%- when YARP::OptionalLocationField -%>
<%- when Prism::OptionalLocationField -%>
<%- if field.should_be_serialized? -%>
if (((yp_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
yp_buffer_append_u8(buffer, 0);
if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
pm_buffer_append_u8(buffer, 0);
} else {
yp_buffer_append_u8(buffer, 1);
yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
pm_buffer_append_u8(buffer, 1);
pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
<%- end -%>
<%- when YARP::UInt32Field -%>
yp_buffer_append_u32(buffer, ((yp_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when YARP::FlagsField -%>
yp_buffer_append_u32(buffer, node->flags >> <%= YARP::COMMON_FLAGS %>);
<%- when Prism::UInt32Field -%>
pm_buffer_append_u32(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when Prism::FlagsField -%>
pm_buffer_append_u32(buffer, node->flags >> <%= Prism::COMMON_FLAGS %>);
<%- else -%>
<%- raise -%>
<%- end -%>
<%- end -%>
<%- if node.needs_serialized_length? -%>
// serialize length
uint32_t length = yp_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
<%- end -%>
break;
@ -127,83 +127,83 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
}
static void
yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_t *buffer) {
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
// serialize type
yp_buffer_append_u8(buffer, (uint8_t) comment->type);
pm_buffer_append_u8(buffer, (uint8_t) comment->type);
// serialize location
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(comment->start - parser->start));
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(comment->end - comment->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(comment->start - parser->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
}
static void
yp_serialize_comment_list(yp_parser_t *parser, yp_list_t *list, yp_buffer_t *buffer) {
yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(list)));
pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
yp_comment_t *comment;
for (comment = (yp_comment_t *) list->head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
yp_serialize_comment(parser, comment, buffer);
pm_comment_t *comment;
for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
pm_serialize_comment(parser, comment, buffer);
}
}
static void
yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, yp_buffer_t *buffer) {
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
// serialize message
size_t message_length = strlen(diagnostic->message);
yp_buffer_append_u32(buffer, yp_sizet_to_u32(message_length));
yp_buffer_append_str(buffer, diagnostic->message, message_length);
pm_buffer_append_u32(buffer, pm_sizet_to_u32(message_length));
pm_buffer_append_str(buffer, diagnostic->message, message_length);
// serialize location
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(diagnostic->start - parser->start));
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(diagnostic->end - diagnostic->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(diagnostic->start - parser->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(diagnostic->end - diagnostic->start));
}
static void
yp_serialize_diagnostic_list(yp_parser_t *parser, yp_list_t *list, yp_buffer_t *buffer) {
yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(list)));
pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
yp_diagnostic_t *diagnostic;
for (diagnostic = (yp_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (yp_diagnostic_t *) diagnostic->node.next) {
yp_serialize_diagnostic(parser, diagnostic, buffer);
pm_diagnostic_t *diagnostic;
for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
pm_serialize_diagnostic(parser, diagnostic, buffer);
}
}
static void
yp_serialize_encoding(yp_encoding_t *encoding, yp_buffer_t *buffer) {
pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
size_t encoding_length = strlen(encoding->name);
yp_buffer_append_u32(buffer, yp_sizet_to_u32(encoding_length));
yp_buffer_append_str(buffer, encoding->name, encoding_length);
pm_buffer_append_u32(buffer, pm_sizet_to_u32(encoding_length));
pm_buffer_append_str(buffer, encoding->name, encoding_length);
}
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
void
yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_serialize_encoding(&parser->encoding, buffer);
yp_serialize_comment_list(parser, &parser->comment_list, buffer);
yp_serialize_diagnostic_list(parser, &parser->error_list, buffer);
yp_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_encoding(&parser->encoding, buffer);
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
// Here we're going to leave space for the offset of the constant pool in
// the buffer.
size_t offset = buffer->length;
yp_buffer_append_zeroes(buffer, 4);
pm_buffer_append_zeroes(buffer, 4);
// Next, encode the length of the constant pool.
yp_buffer_append_u32(buffer, parser->constant_pool.size);
pm_buffer_append_u32(buffer, parser->constant_pool.size);
// Now we're going to serialize the content of the node.
yp_serialize_node(parser, node, buffer);
pm_serialize_node(parser, node, buffer);
// Now we're going to serialize the offset of the constant pool back where
// we left space for it.
uint32_t length = yp_sizet_to_u32(buffer->length);
uint32_t length = pm_sizet_to_u32(buffer->length);
memcpy(buffer->value + offset, &length, sizeof(uint32_t));
// Now we're going to serialize the constant pool.
offset = buffer->length;
yp_buffer_append_zeroes(buffer, parser->constant_pool.size * 8);
pm_buffer_append_zeroes(buffer, parser->constant_pool.size * 8);
yp_constant_t *constant;
pm_constant_t *constant;
for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) {
constant = &parser->constant_pool.constants[index];
@ -218,82 +218,82 @@ yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer)
// effectively in place of the source offset, we have a buffer
// offset. We will add a leading 1 to indicate that this is a
// buffer offset.
uint32_t content_offset = yp_sizet_to_u32(buffer->length);
uint32_t content_offset = pm_sizet_to_u32(buffer->length);
uint32_t owned_mask = (uint32_t) (1 << 31);
assert(content_offset < owned_mask);
content_offset |= owned_mask;
memcpy(buffer->value + buffer_offset, &content_offset, 4);
yp_buffer_append_bytes(buffer, constant->start, constant->length);
pm_buffer_append_bytes(buffer, constant->start, constant->length);
} else {
// Since this is a shared constant, we are going to write its
// source offset directly into the buffer.
uint32_t source_offset = yp_ptrdifft_to_u32(constant->start - parser->start);
uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start);
memcpy(buffer->value + buffer_offset, &source_offset, 4);
}
// Now we can write the length of the constant into the buffer.
uint32_t constant_length = yp_sizet_to_u32(constant->length);
uint32_t constant_length = pm_sizet_to_u32(constant->length);
memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
}
}
}
static void
serialize_token(void *data, yp_parser_t *parser, yp_token_t *token) {
yp_buffer_t *buffer = (yp_buffer_t *) data;
serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
pm_buffer_t *buffer = (pm_buffer_t *) data;
yp_buffer_append_u32(buffer, token->type);
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(token->start - parser->start));
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(token->end - token->start));
yp_buffer_append_u32(buffer, parser->lex_state);
pm_buffer_append_u32(buffer, token->type);
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(token->start - parser->start));
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(token->end - token->start));
pm_buffer_append_u32(buffer, parser->lex_state);
}
YP_EXPORTED_FUNCTION void
yp_lex_serialize(const uint8_t *source, size_t size, const char *filepath, yp_buffer_t *buffer) {
yp_parser_t parser;
yp_parser_init(&parser, source, size, filepath);
PRISM_EXPORTED_FUNCTION void
pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) {
pm_parser_t parser;
pm_parser_init(&parser, source, size, filepath);
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
.data = (void *) buffer,
.callback = serialize_token,
};
parser.lex_callback = &lex_callback;
yp_node_t *node = yp_parse(&parser);
pm_node_t *node = pm_parse(&parser);
// Append 0 to mark end of tokens
yp_buffer_append_u8(buffer, 0);
pm_buffer_append_u8(buffer, 0);
yp_serialize_encoding(&parser.encoding, buffer);
yp_serialize_comment_list(&parser, &parser.comment_list, buffer);
yp_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
yp_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
pm_serialize_encoding(&parser.encoding, buffer);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
}
// Parse and serialize both the AST and the tokens represented by the given
// source to the given buffer.
YP_EXPORTED_FUNCTION void
yp_parse_lex_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
yp_parser_t parser;
yp_parser_init(&parser, source, size, NULL);
if (metadata) yp_parser_metadata(&parser, metadata);
PRISM_EXPORTED_FUNCTION void
pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
pm_parser_t parser;
pm_parser_init(&parser, source, size, NULL);
if (metadata) pm_parser_metadata(&parser, metadata);
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
.data = (void *) buffer,
.callback = serialize_token,
};
parser.lex_callback = &lex_callback;
yp_node_t *node = yp_parse(&parser);
pm_node_t *node = pm_parse(&parser);
yp_buffer_append_u8(buffer, 0);
yp_serialize(&parser, node, buffer);
pm_buffer_append_u8(buffer, 0);
pm_serialize(&parser, node, buffer);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
}

Просмотреть файл

@ -1,17 +1,17 @@
#include <string.h>
#include "yarp/ast.h"
#include "prism/ast.h"
// Returns a string representation of the given token type.
YP_EXPORTED_FUNCTION const char *
yp_token_type_to_str(yp_token_type_t token_type)
PRISM_EXPORTED_FUNCTION const char *
pm_token_type_to_str(pm_token_type_t token_type)
{
switch (token_type) {
<%- tokens.each do |token| -%>
case YP_TOKEN_<%= token.name %>:
case PM_TOKEN_<%= token.name %>:
return "<%= token.name %>";
<%- end -%>
case YP_TOKEN_MAXIMUM:
case PM_TOKEN_MAXIMUM:
return "MAXIMUM";
}
return "\0";

Просмотреть файл

@ -4,12 +4,12 @@ require "erb"
require "fileutils"
require "yaml"
module YARP
module Prism
COMMON_FLAGS = 2
SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
JAVA_BACKEND = ENV["YARP_JAVA_BACKEND"] || "truffleruby"
JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
# This represents a field on a node. It contains all of the necessary
@ -35,9 +35,9 @@ module YARP
class NodeKindField < Field
def c_type
if options[:kind]
"yp_#{options[:kind].gsub(/(?<=.)[A-Z]/, "_\\0").downcase}"
"pm_#{options[:kind].gsub(/(?<=.)[A-Z]/, "_\\0").downcase}"
else
"yp_node"
"pm_node"
end
end
@ -201,7 +201,7 @@ module YARP
@name = config.fetch("name")
type = @name.gsub(/(?<=.)[A-Z]/, "_\\0")
@type = "YP_#{type.upcase}"
@type = "PM_#{type.upcase}"
@human = type.downcase
@fields =
@ -255,7 +255,7 @@ module YARP
def declaration
output = []
output << "YP_TOKEN_#{name}"
output << "PM_TOKEN_#{name}"
output << " = #{value}" if value
output << ", // #{comment}"
output.join
@ -361,18 +361,18 @@ module YARP
end
TEMPLATES = [
"ext/yarp/api_node.c",
"include/yarp/ast.h",
"java/org/yarp/Loader.java",
"java/org/yarp/Nodes.java",
"java/org/yarp/AbstractNodeVisitor.java",
"lib/yarp/compiler.rb",
"lib/yarp/dispatcher.rb",
"lib/yarp/dsl.rb",
"lib/yarp/mutation_compiler.rb",
"lib/yarp/node.rb",
"lib/yarp/serialize.rb",
"lib/yarp/visitor.rb",
"ext/prism/api_node.c",
"include/prism/ast.h",
"java/org/prism/Loader.java",
"java/org/prism/Nodes.java",
"java/org/prism/AbstractNodeVisitor.java",
"lib/prism/compiler.rb",
"lib/prism/dispatcher.rb",
"lib/prism/dsl.rb",
"lib/prism/mutation_compiler.rb",
"lib/prism/node.rb",
"lib/prism/serialize.rb",
"lib/prism/visitor.rb",
"src/node.c",
"src/prettyprint.c",
"src/serialize.c",
@ -382,9 +382,9 @@ end
if __FILE__ == $0
if ARGV.empty?
YARP::TEMPLATES.each { |filepath| YARP.template(filepath) }
Prism::TEMPLATES.each { |filepath| Prism.template(filepath) }
else # ruby/ruby
name, write_to = ARGV
YARP.template(name, write_to: write_to)
Prism.template(name, write_to: write_to)
end
end

Просмотреть файл

@ -1,13 +1,13 @@
#include "yarp.h"
#include "prism.h"
/******************************************************************************/
/* Character checks */
/******************************************************************************/
static inline bool
yp_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
pm_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
for (size_t index = 0; index < length; index++) {
if (!yp_char_is_hexadecimal_digit(string[index])) {
if (!pm_char_is_hexadecimal_digit(string[index])) {
return false;
}
}
@ -18,7 +18,7 @@ yp_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
// expensive to go through the indirection of the function pointer. Instead we
// provide a fast path that will check if we can just return 1.
static inline size_t
yp_char_width(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
pm_char_width(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
if (parser->encoding_changed || (*start >= 0x80)) {
return parser->encoding.char_width(start, end - start);
} else {
@ -71,11 +71,11 @@ char_is_ascii_printable(const uint8_t b) {
static inline size_t
unescape_octal(const uint8_t *backslash, uint8_t *value, const uint8_t *end) {
*value = (uint8_t) (backslash[1] - '0');
if (backslash + 2 >= end || !yp_char_is_octal_digit(backslash[2])) {
if (backslash + 2 >= end || !pm_char_is_octal_digit(backslash[2])) {
return 2;
}
*value = (uint8_t) ((*value << 3) | (backslash[2] - '0'));
if (backslash + 3 >= end || !yp_char_is_octal_digit(backslash[3])) {
if (backslash + 3 >= end || !pm_char_is_octal_digit(backslash[3])) {
return 3;
}
*value = (uint8_t) ((*value << 3) | (backslash[3] - '0'));
@ -91,14 +91,14 @@ unescape_hexadecimal_digit(const uint8_t value) {
// Scan the 1-2 digits of hexadecimal into the value. Returns the number of
// digits scanned.
static inline size_t
unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end, yp_list_t *error_list) {
unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end, pm_list_t *error_list) {
*value = 0;
if (backslash + 2 >= end || !yp_char_is_hexadecimal_digit(backslash[2])) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_HEXADECIMAL);
if (backslash + 2 >= end || !pm_char_is_hexadecimal_digit(backslash[2])) {
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
return 2;
}
*value = unescape_hexadecimal_digit(backslash[2]);
if (backslash + 3 >= end || !yp_char_is_hexadecimal_digit(backslash[3])) {
if (backslash + 3 >= end || !pm_char_is_hexadecimal_digit(backslash[3])) {
return 3;
}
*value = (uint8_t) ((*value << 4) | unescape_hexadecimal_digit(backslash[3]));
@ -121,7 +121,7 @@ unescape_unicode(const uint8_t *string, size_t length, uint32_t *value) {
// 32-bit value to write. Writes the UTF-8 representation of the value to the
// string and returns the number of bytes written.
static inline size_t
unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, const uint8_t *end, yp_list_t *error_list) {
unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, const uint8_t *end, pm_list_t *error_list) {
if (value <= 0x7F) {
// 0xxxxxxx
dest[0] = (uint8_t) value;
@ -157,7 +157,7 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
// If we get here, then the value is too big. This is an error, but we don't
// want to just crash, so instead we'll add an error to the error list and put
// in a replacement character instead.
if (error_list) yp_diagnostic_list_append(error_list, start, end, YP_ERR_ESCAPE_INVALID_UNICODE);
if (error_list) pm_diagnostic_list_append(error_list, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
dest[0] = 0xEF;
dest[1] = 0xBF;
dest[2] = 0xBD;
@ -165,20 +165,20 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
}
typedef enum {
YP_UNESCAPE_FLAG_NONE = 0,
YP_UNESCAPE_FLAG_CONTROL = 1,
YP_UNESCAPE_FLAG_META = 2,
YP_UNESCAPE_FLAG_EXPECT_SINGLE = 4
} yp_unescape_flag_t;
PM_UNESCAPE_FLAG_NONE = 0,
PM_UNESCAPE_FLAG_CONTROL = 1,
PM_UNESCAPE_FLAG_META = 2,
PM_UNESCAPE_FLAG_EXPECT_SINGLE = 4
} pm_unescape_flag_t;
// Unescape a single character value based on the given flags.
static inline uint8_t
unescape_char(uint8_t value, const uint8_t flags) {
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
if (flags & PM_UNESCAPE_FLAG_CONTROL) {
value &= 0x1f;
}
if (flags & YP_UNESCAPE_FLAG_META) {
if (flags & PM_UNESCAPE_FLAG_META) {
value |= 0x80;
}
@ -188,13 +188,13 @@ unescape_char(uint8_t value, const uint8_t flags) {
// Read a specific escape sequence into the given destination.
static const uint8_t *
unescape(
yp_parser_t *parser,
pm_parser_t *parser,
uint8_t *dest,
size_t *dest_length,
const uint8_t *backslash,
const uint8_t *end,
const uint8_t flags,
yp_list_t *error_list
pm_list_t *error_list
) {
switch (backslash[1]) {
case 'a':
@ -234,8 +234,8 @@ unescape(
// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
case 'u': {
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS);
if ((flags & PM_UNESCAPE_FLAG_CONTROL) | (flags & PM_UNESCAPE_FLAG_META)) {
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS);
return backslash + 2;
}
@ -244,26 +244,26 @@ unescape(
const uint8_t *extra_codepoints_start = NULL;
int codepoints_count = 0;
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
unicode_cursor += pm_strspn_whitespace(unicode_cursor, end - unicode_cursor);
while ((unicode_cursor < end) && (*unicode_cursor != '}')) {
const uint8_t *unicode_start = unicode_cursor;
size_t hexadecimal_length = yp_strspn_hexadecimal_digit(unicode_cursor, end - unicode_cursor);
size_t hexadecimal_length = pm_strspn_hexadecimal_digit(unicode_cursor, end - unicode_cursor);
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
if (hexadecimal_length > 6) {
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE_LONG);
if (error_list) pm_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
}
// there are not hexadecimal characters
else if (hexadecimal_length == 0) {
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE);
if (error_list) pm_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
return unicode_cursor;
}
unicode_cursor += hexadecimal_length;
codepoints_count++;
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count == 2)
if (flags & PM_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count == 2)
extra_codepoints_start = unicode_start;
uint32_t value;
@ -272,23 +272,23 @@ unescape(
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
}
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
unicode_cursor += pm_strspn_whitespace(unicode_cursor, end - unicode_cursor);
}
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1) {
if (error_list) yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, YP_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
if (flags & PM_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1) {
if (error_list) pm_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
}
if (unicode_cursor < end && *unicode_cursor == '}') {
unicode_cursor++;
} else {
if (error_list) yp_diagnostic_list_append(error_list, backslash, unicode_cursor, YP_ERR_ESCAPE_INVALID_UNICODE_TERM);
if (error_list) pm_diagnostic_list_append(error_list, backslash, unicode_cursor, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
}
return unicode_cursor;
}
else if ((backslash + 5) < end && yp_char_is_hexadecimal_digits(backslash + 2, 4)) {
else if ((backslash + 5) < end && pm_char_is_hexadecimal_digits(backslash + 2, 4)) {
uint32_t value;
unescape_unicode(backslash + 2, 4, &value);
@ -298,7 +298,7 @@ unescape(
return backslash + 6;
}
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_UNICODE);
return backslash + 2;
}
// \c\M-x meta control character, where x is an ASCII printable character
@ -306,18 +306,18 @@ unescape(
// \cx control character, where x is an ASCII printable character
case 'c':
if (backslash + 2 >= end) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
return end;
}
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
if (flags & PM_UNESCAPE_FLAG_CONTROL) {
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
return backslash + 2;
}
switch (backslash[2]) {
case '\\':
return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, error_list);
return unescape(parser, dest, dest_length, backslash + 2, end, flags | PM_UNESCAPE_FLAG_CONTROL, error_list);
case '?':
if (dest) {
dest[(*dest_length)++] = unescape_char(0x7f, flags);
@ -325,12 +325,12 @@ unescape(
return backslash + 3;
default: {
if (!char_is_ascii_printable(backslash[2])) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
return backslash + 2;
}
if (dest) {
dest[(*dest_length)++] = unescape_char(backslash[2], flags | YP_UNESCAPE_FLAG_CONTROL);
dest[(*dest_length)++] = unescape_char(backslash[2], flags | PM_UNESCAPE_FLAG_CONTROL);
}
return backslash + 3;
}
@ -339,23 +339,23 @@ unescape(
// \C-? delete, ASCII 7Fh (DEL)
case 'C':
if (backslash + 3 >= end) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
return end;
}
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
if (flags & PM_UNESCAPE_FLAG_CONTROL) {
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
return backslash + 2;
}
if (backslash[2] != '-') {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
return backslash + 2;
}
switch (backslash[3]) {
case '\\':
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, error_list);
return unescape(parser, dest, dest_length, backslash + 3, end, flags | PM_UNESCAPE_FLAG_CONTROL, error_list);
case '?':
if (dest) {
dest[(*dest_length)++] = unescape_char(0x7f, flags);
@ -363,12 +363,12 @@ unescape(
return backslash + 4;
default:
if (!char_is_ascii_printable(backslash[3])) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_CONTROL);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_CONTROL);
return backslash + 2;
}
if (dest) {
dest[(*dest_length)++] = unescape_char(backslash[3], flags | YP_UNESCAPE_FLAG_CONTROL);
dest[(*dest_length)++] = unescape_char(backslash[3], flags | PM_UNESCAPE_FLAG_CONTROL);
}
return backslash + 4;
}
@ -377,32 +377,32 @@ unescape(
// \M-x meta character, where x is an ASCII printable character
case 'M': {
if (backslash + 3 >= end) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_META);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_META);
return end;
}
if (flags & YP_UNESCAPE_FLAG_META) {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META_REPEAT);
if (flags & PM_UNESCAPE_FLAG_META) {
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_META_REPEAT);
return backslash + 2;
}
if (backslash[2] != '-') {
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_META);
return backslash + 2;
}
if (backslash[3] == '\\') {
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, error_list);
return unescape(parser, dest, dest_length, backslash + 3, end, flags | PM_UNESCAPE_FLAG_META, error_list);
}
if (char_is_ascii_printable(backslash[3])) {
if (dest) {
dest[(*dest_length)++] = unescape_char(backslash[3], flags | YP_UNESCAPE_FLAG_META);
dest[(*dest_length)++] = unescape_char(backslash[3], flags | PM_UNESCAPE_FLAG_META);
}
return backslash + 4;
}
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_META);
return backslash + 3;
}
// \n
@ -416,7 +416,7 @@ unescape(
/* fallthrough */
// In this case we're escaping something that doesn't need escaping.
default: {
size_t width = yp_char_width(parser, backslash + 1, end);
size_t width = pm_char_width(parser, backslash + 1, end);
if (dest) {
memcpy(dest + *dest_length, backslash + 1, width);
@ -457,13 +457,13 @@ unescape(
// \c? or \C-? delete, ASCII 7Fh (DEL)
//
static void
yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
if (unescape_type == YP_UNESCAPE_NONE) {
pm_unescape_manipulate_string_or_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type, bool expect_single_codepoint) {
if (unescape_type == PM_UNESCAPE_NONE) {
// If we're not unescaping then we can reference the source directly.
return;
}
const uint8_t *backslash = yp_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);
const uint8_t *backslash = pm_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);
if (backslash == NULL) {
// Here there are no escapes, so we can reference the source directly.
@ -474,7 +474,7 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
// within the string.
uint8_t *allocated = malloc(string->length);
if (allocated == NULL) {
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, YP_ERR_MALLOC_FAILED);
pm_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, PM_ERR_MALLOC_FAILED);
return;
}
@ -509,17 +509,17 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
cursor = backslash + 2;
break;
default:
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
if (unescape_type == PM_UNESCAPE_WHITESPACE) {
if (backslash[1] == '\r' && backslash[2] == '\n') {
cursor = backslash + 2;
break;
}
if (yp_strspn_whitespace(backslash + 1, 1)) {
if (pm_strspn_whitespace(backslash + 1, 1)) {
cursor = backslash + 1;
break;
}
}
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
if (unescape_type == PM_UNESCAPE_WHITESPACE || unescape_type == PM_UNESCAPE_MINIMAL) {
// In this case we're escaping something that doesn't need escaping.
dest[dest_length++] = '\\';
cursor = backslash + 1;
@ -528,11 +528,11 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
// This is the only type of unescaping left. In this case we need to
// handle all of the different unescapes.
assert(unescape_type == YP_UNESCAPE_ALL);
assert(unescape_type == PM_UNESCAPE_ALL);
uint8_t flags = YP_UNESCAPE_FLAG_NONE;
uint8_t flags = PM_UNESCAPE_FLAG_NONE;
if (expect_single_codepoint) {
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
flags |= PM_UNESCAPE_FLAG_EXPECT_SINGLE;
}
cursor = unescape(parser, dest, &dest_length, backslash, end, flags, &parser->error_list);
@ -540,7 +540,7 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
}
if (end > cursor) {
backslash = yp_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding);
backslash = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding);
} else {
backslash = NULL;
}
@ -555,30 +555,30 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
// If the string was already allocated, then we need to free that memory
// here. That's because we're about to override it with the escaped string.
yp_string_free(string);
pm_string_free(string);
// We also need to update the length at the end. This is because every escape
// reduces the length of the final string, and we don't want garbage at the
// end.
yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
pm_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
}
YP_EXPORTED_FUNCTION void
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
yp_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, false);
PRISM_EXPORTED_FUNCTION void
pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type) {
pm_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, false);
}
void
yp_unescape_manipulate_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
yp_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, true);
pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type) {
pm_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, true);
}
// This function is similar to yp_unescape_manipulate_string, except it doesn't
// This function is similar to pm_unescape_manipulate_string, except it doesn't
// actually perform any string manipulations. Instead, it calculates how long
// the unescaped character is, and returns that value
size_t
yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
assert(unescape_type != YP_UNESCAPE_NONE);
pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *backslash, pm_unescape_type_t unescape_type, bool expect_single_codepoint) {
assert(unescape_type != PM_UNESCAPE_NONE);
if (backslash + 1 >= parser->end) {
return 0;
@ -589,26 +589,26 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
case '\'':
return 2;
default: {
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
if (unescape_type == PM_UNESCAPE_WHITESPACE) {
if (backslash[1] == '\r' && backslash[2] == '\n') {
return 2;
}
size_t whitespace = yp_strspn_whitespace(backslash + 1, 1);
size_t whitespace = pm_strspn_whitespace(backslash + 1, 1);
if (whitespace > 0) {
return whitespace;
}
}
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
return 1 + yp_char_width(parser, backslash + 1, parser->end);
if (unescape_type == PM_UNESCAPE_WHITESPACE || unescape_type == PM_UNESCAPE_MINIMAL) {
return 1 + pm_char_width(parser, backslash + 1, parser->end);
}
// This is the only type of unescaping left. In this case we need to
// handle all of the different unescapes.
assert(unescape_type == YP_UNESCAPE_ALL);
assert(unescape_type == PM_UNESCAPE_ALL);
uint8_t flags = YP_UNESCAPE_FLAG_NONE;
uint8_t flags = PM_UNESCAPE_FLAG_NONE;
if (expect_single_codepoint) {
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
flags |= PM_UNESCAPE_FLAG_EXPECT_SINGLE;
}
const uint8_t *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, NULL);
@ -622,16 +622,16 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
// This is one of the main entry points into the extension. It accepts a source
// string, a type of unescaping, and a pointer to a result string. It returns a
// boolean indicating whether or not the unescaping was successful.
YP_EXPORTED_FUNCTION bool
yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
yp_parser_t parser;
yp_parser_init(&parser, start, length, NULL);
PRISM_EXPORTED_FUNCTION bool
pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result) {
pm_parser_t parser;
pm_parser_init(&parser, start, length, NULL);
yp_string_shared_init(result, start, start + length);
yp_unescape_manipulate_string(&parser, result, unescape_type);
pm_string_shared_init(result, start, start + length);
pm_unescape_manipulate_string(&parser, result, unescape_type);
bool success = yp_list_empty_p(&parser.error_list);
yp_parser_free(&parser);
bool success = pm_list_empty_p(&parser.error_list);
pm_parser_free(&parser);
return success;
}

Просмотреть файл

@ -1,13 +1,13 @@
#ifndef YARP_UNESCAPE_H
#define YARP_UNESCAPE_H
#ifndef PRISM_UNESCAPE_H
#define PRISM_UNESCAPE_H
#include "yarp/defines.h"
#include "yarp/diagnostic.h"
#include "yarp/parser.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string.h"
#include "prism/defines.h"
#include "prism/diagnostic.h"
#include "prism/parser.h"
#include "prism/util/pm_char.h"
#include "prism/util/pm_list.h"
#include "prism/util/pm_memchr.h"
#include "prism/util/pm_string.h"
#include <assert.h>
#include <stdbool.h>
@ -18,31 +18,31 @@
typedef enum {
// When we're creating a string inside of a list literal like %w, we
// shouldn't escape anything.
YP_UNESCAPE_NONE,
PM_UNESCAPE_NONE,
// When we're unescaping a single-quoted string, we only need to unescape
// single quotes and backslashes.
YP_UNESCAPE_MINIMAL,
PM_UNESCAPE_MINIMAL,
// When we're unescaping a string list, in addition to MINIMAL, we need to
// unescape whitespace.
YP_UNESCAPE_WHITESPACE,
PM_UNESCAPE_WHITESPACE,
// When we're unescaping a double-quoted string, we need to unescape all
// escapes.
YP_UNESCAPE_ALL,
} yp_unescape_type_t;
PM_UNESCAPE_ALL,
} pm_unescape_type_t;
// Unescape the contents of the given token into the given string using the given unescape mode.
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
void yp_unescape_manipulate_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
PRISM_EXPORTED_FUNCTION void pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
void pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
// Accepts a source string and a type of unescaping and returns the unescaped version.
// The caller must yp_string_free(result); after calling this function.
YP_EXPORTED_FUNCTION bool yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
// The caller must pm_string_free(result); after calling this function.
PRISM_EXPORTED_FUNCTION bool pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result);
// Returns the number of bytes that encompass the first escape sequence in the
// given string.
size_t yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);
size_t pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *value, pm_unescape_type_t unescape_type, bool expect_single_codepoint);
#endif

Просмотреть файл

@ -1,38 +1,40 @@
#include "yarp/util/yp_buffer.h"
#include "prism/util/pm_buffer.h"
#define YP_BUFFER_INITIAL_SIZE 1024
#define PRISM_BUFFER_INITIAL_SIZE 1024
// Return the size of the yp_buffer_t struct.
// Return the size of the pm_buffer_t struct.
size_t
yp_buffer_sizeof(void) {
return sizeof(yp_buffer_t);
pm_buffer_sizeof(void) {
return sizeof(pm_buffer_t);
}
// Initialize a yp_buffer_t with its default values.
// Initialize a pm_buffer_t with its default values.
bool
yp_buffer_init(yp_buffer_t *buffer) {
pm_buffer_init(pm_buffer_t *buffer) {
buffer->length = 0;
buffer->capacity = YP_BUFFER_INITIAL_SIZE;
buffer->capacity = PRISM_BUFFER_INITIAL_SIZE;
buffer->value = (char *) malloc(YP_BUFFER_INITIAL_SIZE);
buffer->value = (char *) malloc(PRISM_BUFFER_INITIAL_SIZE);
return buffer->value != NULL;
}
#undef PRISM_BUFFER_INITIAL_SIZE
// Return the value of the buffer.
char *
yp_buffer_value(yp_buffer_t *buffer) {
pm_buffer_value(pm_buffer_t *buffer) {
return buffer->value;
}
// Return the length of the buffer.
size_t
yp_buffer_length(yp_buffer_t *buffer) {
pm_buffer_length(pm_buffer_t *buffer) {
return buffer->length;
}
// Append the given amount of space to the buffer.
static inline void
yp_buffer_append_length(yp_buffer_t *buffer, size_t length) {
pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
size_t next_length = buffer->length + length;
if (next_length > buffer->capacity) {
@ -48,54 +50,54 @@ yp_buffer_append_length(yp_buffer_t *buffer, size_t length) {
// Append a generic pointer to memory to the buffer.
static inline void
yp_buffer_append(yp_buffer_t *buffer, const void *source, size_t length) {
yp_buffer_append_length(buffer, length);
pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
pm_buffer_append_length(buffer, length);
memcpy(buffer->value + (buffer->length - length), source, length);
}
// Append the given amount of space as zeroes to the buffer.
void
yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length) {
yp_buffer_append_length(buffer, length);
pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
pm_buffer_append_length(buffer, length);
memset(buffer->value + (buffer->length - length), 0, length);
}
// Append a string to the buffer.
void
yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length) {
yp_buffer_append(buffer, value, length);
pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length) {
pm_buffer_append(buffer, value, length);
}
// Append a list of bytes to the buffer.
void
yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length) {
yp_buffer_append(buffer, (const char *) value, length);
pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
pm_buffer_append(buffer, (const char *) value, length);
}
// Append a single byte to the buffer.
void
yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value) {
pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value) {
const void *source = &value;
yp_buffer_append(buffer, source, sizeof(uint8_t));
pm_buffer_append(buffer, source, sizeof(uint8_t));
}
// Append a 32-bit unsigned integer to the buffer.
void
yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value) {
pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value) {
if (value < 128) {
yp_buffer_append_u8(buffer, (uint8_t) value);
pm_buffer_append_u8(buffer, (uint8_t) value);
} else {
uint32_t n = value;
while (n >= 128) {
yp_buffer_append_u8(buffer, (uint8_t) (n | 128));
pm_buffer_append_u8(buffer, (uint8_t) (n | 128));
n >>= 7;
}
yp_buffer_append_u8(buffer, (uint8_t) n);
pm_buffer_append_u8(buffer, (uint8_t) n);
}
}
// Free the memory associated with the buffer.
void
yp_buffer_free(yp_buffer_t *buffer) {
pm_buffer_free(pm_buffer_t *buffer) {
free(buffer->value);
}

Просмотреть файл

@ -1,7 +1,7 @@
#ifndef YARP_BUFFER_H
#define YARP_BUFFER_H
#ifndef PRISM_BUFFER_H
#define PRISM_BUFFER_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <assert.h>
#include <stdbool.h>
@ -9,43 +9,43 @@
#include <stdlib.h>
#include <string.h>
// A yp_buffer_t is a simple memory buffer that stores data in a contiguous
// A pm_buffer_t is a simple memory buffer that stores data in a contiguous
// block of memory. It is used to store the serialized representation of a
// YARP tree.
// prism tree.
typedef struct {
char *value;
size_t length;
size_t capacity;
} yp_buffer_t;
} pm_buffer_t;
// Return the size of the yp_buffer_t struct.
YP_EXPORTED_FUNCTION size_t yp_buffer_sizeof(void);
// Return the size of the pm_buffer_t struct.
PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
// Initialize a yp_buffer_t with its default values.
YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer);
// Initialize a pm_buffer_t with its default values.
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
// Return the value of the buffer.
YP_EXPORTED_FUNCTION char * yp_buffer_value(yp_buffer_t *buffer);
PRISM_EXPORTED_FUNCTION char * pm_buffer_value(pm_buffer_t *buffer);
// Return the length of the buffer.
YP_EXPORTED_FUNCTION size_t yp_buffer_length(yp_buffer_t *buffer);
PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(pm_buffer_t *buffer);
// Append the given amount of space as zeroes to the buffer.
void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
// Append a string to the buffer.
void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);
void pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length);
// Append a list of bytes to the buffer.
void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length);
void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
// Append a single byte to the buffer.
void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);
void pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value);
// Append a 32-bit unsigned integer to the buffer.
void yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value);
void pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value);
// Free the memory associated with the buffer.
YP_EXPORTED_FUNCTION void yp_buffer_free(yp_buffer_t *buffer);
PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
#endif

Просмотреть файл

@ -1,19 +1,19 @@
#include "yarp/util/yp_char.h"
#include "prism/util/pm_char.h"
#define YP_CHAR_BIT_WHITESPACE (1 << 0)
#define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
#define YP_CHAR_BIT_REGEXP_OPTION (1 << 2)
#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
#define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0)
#define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1)
#define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
#define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
#define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
#define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
#define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
#define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
static const uint8_t yp_byte_table[256] = {
static const uint8_t pm_byte_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@ -33,7 +33,7 @@ static const uint8_t yp_byte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
};
static const uint8_t yp_number_table[256] = {
static const uint8_t pm_number_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
@ -54,36 +54,36 @@ static const uint8_t yp_number_table[256] = {
};
static inline size_t
yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
if (length <= 0) return 0;
size_t size = 0;
size_t maximum = (size_t) length;
while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
return size;
}
// Returns the number of characters at the start of the string that are
// whitespace. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
}
// Returns the number of characters at the start of the string that are
// whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters.
size_t
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
if (length <= 0) return 0;
size_t size = 0;
size_t maximum = (size_t) length;
while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
if (string[size] == '\n') {
yp_newline_list_append(newline_list, string + size);
pm_newline_list_append(newline_list, string + size);
}
size++;
@ -95,45 +95,45 @@ yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newlin
// Returns the number of characters at the start of the string that are inline
// whitespace. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
}
// Returns the number of characters at the start of the string that are regexp
// options. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
}
static inline bool
yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
return (yp_byte_table[b] & kind) != 0;
pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
return (pm_byte_table[b] & kind) != 0;
}
// Returns true if the given character is a whitespace character.
bool
yp_char_is_whitespace(const uint8_t b) {
return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
pm_char_is_whitespace(const uint8_t b) {
return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
}
// Returns true if the given character is an inline whitespace character.
bool
yp_char_is_inline_whitespace(const uint8_t b) {
return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
pm_char_is_inline_whitespace(const uint8_t b) {
return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
}
// Scan through the string and return the number of characters at the start of
// the string that match the given kind. Disallows searching past the given
// maximum number of characters.
static inline size_t
yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
if (length <= 0) return 0;
size_t size = 0;
size_t maximum = (size_t) length;
while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
return size;
}
@ -144,14 +144,14 @@ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
// Additionally, report the location of the last invalid underscore character
// found in the string through the out invalid parameter.
static inline size_t
yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
if (length <= 0) return 0;
size_t size = 0;
size_t maximum = (size_t) length;
bool underscore = false;
while (size < maximum && (yp_number_table[string[size]] & kind)) {
while (size < maximum && (pm_number_table[string[size]] & kind)) {
if (string[size] == '_') {
if (underscore) *invalid = string + size;
underscore = true;
@ -174,8 +174,8 @@ yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
}
// Returns the number of characters at the start of the string that are octal
@ -186,15 +186,15 @@ yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t *
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
}
// Returns the number of characters at the start of the string that are decimal
// digits. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
}
// Returns the number of characters at the start of the string that are decimal
@ -205,16 +205,16 @@ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
}
// Returns the number of characters at the start of the string that are
// hexadecimal digits. Disallows searching past the given maximum number of
// characters.
size_t
yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
}
// Returns the number of characters at the start of the string that are
@ -225,48 +225,48 @@ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
}
static inline bool
yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
return (yp_number_table[b] & kind) != 0;
pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
return (pm_number_table[b] & kind) != 0;
}
// Returns true if the given character is a binary digit.
bool
yp_char_is_binary_digit(const uint8_t b) {
return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
pm_char_is_binary_digit(const uint8_t b) {
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
}
// Returns true if the given character is an octal digit.
bool
yp_char_is_octal_digit(const uint8_t b) {
return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
pm_char_is_octal_digit(const uint8_t b) {
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
}
// Returns true if the given character is a decimal digit.
bool
yp_char_is_decimal_digit(const uint8_t b) {
return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
pm_char_is_decimal_digit(const uint8_t b) {
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
}
// Returns true if the given character is a hexadecimal digit.
bool
yp_char_is_hexadecimal_digit(const uint8_t b) {
return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
pm_char_is_hexadecimal_digit(const uint8_t b) {
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
}
#undef YP_CHAR_BIT_WHITESPACE
#undef YP_CHAR_BIT_INLINE_WHITESPACE
#undef YP_CHAR_BIT_REGEXP_OPTION
#undef PRISM_CHAR_BIT_WHITESPACE
#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
#undef PRISM_CHAR_BIT_REGEXP_OPTION
#undef YP_NUMBER_BIT_BINARY_DIGIT
#undef YP_NUMBER_BIT_BINARY_NUMBER
#undef YP_NUMBER_BIT_OCTAL_DIGIT
#undef YP_NUMBER_BIT_OCTAL_NUMBER
#undef YP_NUMBER_BIT_DECIMAL_DIGIT
#undef YP_NUMBER_BIT_DECIMAL_NUMBER
#undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER
#undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT
#undef PRISM_NUMBER_BIT_BINARY_DIGIT
#undef PRISM_NUMBER_BIT_BINARY_NUMBER
#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT

Просмотреть файл

@ -1,34 +1,34 @@
#ifndef YP_CHAR_H
#define YP_CHAR_H
#ifndef PRISM_CHAR_H
#define PRISM_CHAR_H
#include "yarp/defines.h"
#include "yarp/util/yp_newline_list.h"
#include "prism/defines.h"
#include "prism/util/pm_newline_list.h"
#include <stdbool.h>
#include <stddef.h>
// Returns the number of characters at the start of the string that are
// whitespace. Disallows searching past the given maximum number of characters.
size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are
// whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters.
size_t
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list);
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
// Returns the number of characters at the start of the string that are inline
// whitespace. Disallows searching past the given maximum number of characters.
size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are decimal
// digits. Disallows searching past the given maximum number of characters.
size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are
// hexadecimal digits. Disallows searching past the given maximum number of
// characters.
size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are octal
// digits or underscores. Disallows searching past the given maximum number of
@ -37,7 +37,7 @@ size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are decimal
// digits or underscores. Disallows searching past the given maximum number of
@ -46,7 +46,7 @@ size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uin
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are
// hexadecimal digits or underscores. Disallows searching past the given maximum
@ -55,11 +55,11 @@ size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const u
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are regexp
// options. Disallows searching past the given maximum number of characters.
size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are binary
// digits or underscores. Disallows searching past the given maximum number of
@ -68,24 +68,24 @@ size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns true if the given character is a whitespace character.
bool yp_char_is_whitespace(const uint8_t b);
bool pm_char_is_whitespace(const uint8_t b);
// Returns true if the given character is an inline whitespace character.
bool yp_char_is_inline_whitespace(const uint8_t b);
bool pm_char_is_inline_whitespace(const uint8_t b);
// Returns true if the given character is a binary digit.
bool yp_char_is_binary_digit(const uint8_t b);
bool pm_char_is_binary_digit(const uint8_t b);
// Returns true if the given character is an octal digit.
bool yp_char_is_octal_digit(const uint8_t b);
bool pm_char_is_octal_digit(const uint8_t b);
// Returns true if the given character is a decimal digit.
bool yp_char_is_decimal_digit(const uint8_t b);
bool pm_char_is_decimal_digit(const uint8_t b);
// Returns true if the given character is a hexadecimal digit.
bool yp_char_is_hexadecimal_digit(const uint8_t b);
bool pm_char_is_hexadecimal_digit(const uint8_t b);
#endif

Просмотреть файл

@ -1,8 +1,8 @@
#include "yarp/util/yp_constant_pool.h"
#include "prism/util/pm_constant_pool.h"
// Initialize a list of constant ids.
void
yp_constant_id_list_init(yp_constant_id_list_t *list) {
pm_constant_id_list_init(pm_constant_id_list_t *list) {
list->ids = NULL;
list->size = 0;
list->capacity = 0;
@ -11,10 +11,10 @@ yp_constant_id_list_init(yp_constant_id_list_t *list) {
// Append a constant id to a list of constant ids. Returns false if any
// potential reallocations fail.
bool
yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) {
pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
if (list->size >= list->capacity) {
list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
list->ids = (yp_constant_id_t *) realloc(list->ids, sizeof(yp_constant_id_t) * list->capacity);
list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
if (list->ids == NULL) return false;
}
@ -24,7 +24,7 @@ yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) {
// Checks if the current constant id list includes the given constant id.
bool
yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) {
pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
for (size_t index = 0; index < list->size; index++) {
if (list->ids[index] == id) return true;
}
@ -33,13 +33,13 @@ yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) {
// Get the memory size of a list of constant ids.
size_t
yp_constant_id_list_memsize(yp_constant_id_list_t *list) {
return sizeof(yp_constant_id_list_t) + (list->capacity * sizeof(yp_constant_id_t));
pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
}
// Free the memory associated with a list of constant ids.
void
yp_constant_id_list_free(yp_constant_id_list_t *list) {
pm_constant_id_list_free(pm_constant_id_list_t *list) {
if (list->ids != NULL) {
free(list->ids);
}
@ -48,7 +48,7 @@ yp_constant_id_list_free(yp_constant_id_list_t *list) {
// A relatively simple hash function (djb2) that is used to hash strings. We are
// optimizing here for simplicity and speed.
static inline uint32_t
yp_constant_pool_hash(const uint8_t *start, size_t length) {
pm_constant_pool_hash(const uint8_t *start, size_t length) {
// This is a prime number used as the initial value for the hash function.
uint32_t value = 5381;
@ -86,20 +86,20 @@ is_power_of_two(uint32_t size) {
// Resize a constant pool to a given capacity.
static inline bool
yp_constant_pool_resize(yp_constant_pool_t *pool) {
pm_constant_pool_resize(pm_constant_pool_t *pool) {
assert(is_power_of_two(pool->capacity));
uint32_t next_capacity = pool->capacity * 2;
if (next_capacity < pool->capacity) return false;
const uint32_t mask = next_capacity - 1;
yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t));
pm_constant_t *next_constants = calloc(next_capacity, sizeof(pm_constant_t));
if (next_constants == NULL) return false;
// For each constant in the current constant pool, rehash the content, find
// the index in the next constant pool, and insert it.
for (uint32_t index = 0; index < pool->capacity; index++) {
yp_constant_t *constant = &pool->constants[index];
pm_constant_t *constant = &pool->constants[index];
// If an id is set on this constant, then we know we have content here.
// In this case we need to insert it into the next constant pool.
@ -127,12 +127,12 @@ yp_constant_pool_resize(yp_constant_pool_t *pool) {
// Initialize a new constant pool with a given capacity.
bool
yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity) {
pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
const uint32_t maximum = (~((uint32_t) 0));
if (capacity >= ((maximum / 2) + 1)) return false;
capacity = next_power_of_two(capacity);
pool->constants = calloc(capacity, sizeof(yp_constant_t));
pool->constants = calloc(capacity, sizeof(pm_constant_t));
if (pool->constants == NULL) return false;
pool->size = 0;
@ -141,18 +141,18 @@ yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity) {
}
// Insert a constant into a constant pool and return its index in the pool.
static inline yp_constant_id_t
yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
static inline pm_constant_id_t
pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
if (pool->size >= (pool->capacity / 4 * 3)) {
if (!yp_constant_pool_resize(pool)) return 0;
if (!pm_constant_pool_resize(pool)) return 0;
}
assert(is_power_of_two(pool->capacity));
const uint32_t mask = pool->capacity - 1;
uint32_t hash = yp_constant_pool_hash(start, length);
uint32_t hash = pm_constant_pool_hash(start, length);
uint32_t index = hash & mask;
yp_constant_t *constant;
pm_constant_t *constant;
while (constant = &pool->constants[index], constant->id != 0) {
// If there is a collision, then we need to check if the content is the
@ -186,7 +186,7 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l
pool->size++;
assert(pool->size < ((uint32_t) (1 << 31)));
*constant = (yp_constant_t) {
*constant = (pm_constant_t) {
.id = (unsigned int) (pool->size & 0x7FFFFFFF),
.owned = owned,
.start = start,
@ -199,26 +199,26 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l
// Insert a constant into a constant pool. Returns the id of the constant, or 0
// if any potential calls to resize fail.
yp_constant_id_t
yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
return yp_constant_pool_insert(pool, start, length, false);
pm_constant_id_t
pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
return pm_constant_pool_insert(pool, start, length, false);
}
// Insert a constant into a constant pool from memory that is now owned by the
// constant pool. Returns the id of the constant, or 0 if any potential calls to
// resize fail.
yp_constant_id_t
yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
return yp_constant_pool_insert(pool, start, length, true);
pm_constant_id_t
pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
return pm_constant_pool_insert(pool, start, length, true);
}
// Free the memory associated with a constant pool.
void
yp_constant_pool_free(yp_constant_pool_t *pool) {
pm_constant_pool_free(pm_constant_pool_t *pool) {
// For each constant in the current constant pool, free the contents if the
// contents are owned.
for (uint32_t index = 0; index < pool->capacity; index++) {
yp_constant_t *constant = &pool->constants[index];
pm_constant_t *constant = &pool->constants[index];
// If an id is set on this constant, then we know we have content here.
if (constant->id != 0 && constant->owned) {

Просмотреть файл

@ -3,10 +3,10 @@
// equality. This comparison ends up being much faster than strcmp, since it
// only requires a single integer comparison.
#ifndef YP_CONSTANT_POOL_H
#define YP_CONSTANT_POOL_H
#ifndef PRISM_CONSTANT_POOL_H
#define PRISM_CONSTANT_POOL_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <assert.h>
#include <stdbool.h>
@ -14,30 +14,30 @@
#include <stdlib.h>
#include <string.h>
typedef uint32_t yp_constant_id_t;
typedef uint32_t pm_constant_id_t;
typedef struct {
yp_constant_id_t *ids;
pm_constant_id_t *ids;
size_t size;
size_t capacity;
} yp_constant_id_list_t;
} pm_constant_id_list_t;
// Initialize a list of constant ids.
void yp_constant_id_list_init(yp_constant_id_list_t *list);
void pm_constant_id_list_init(pm_constant_id_list_t *list);
// Append a constant id to a list of constant ids. Returns false if any
// potential reallocations fail.
bool yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id);
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
// Checks if the current constant id list includes the given constant id.
bool
yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id);
pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
// Get the memory size of a list of constant ids.
size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list);
size_t pm_constant_id_list_memsize(pm_constant_id_list_t *list);
// Free the memory associated with a list of constant ids.
void yp_constant_id_list_free(yp_constant_id_list_t *list);
void pm_constant_id_list_free(pm_constant_id_list_t *list);
typedef struct {
unsigned int id: 31;
@ -45,30 +45,30 @@ typedef struct {
const uint8_t *start;
size_t length;
uint32_t hash;
} yp_constant_t;
} pm_constant_t;
typedef struct {
yp_constant_t *constants;
pm_constant_t *constants;
uint32_t size;
uint32_t capacity;
} yp_constant_pool_t;
} pm_constant_pool_t;
// Define an empty constant pool.
#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
#define PM_CONSTANT_POOL_EMPTY ((pm_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
// Initialize a new constant pool with a given capacity.
bool yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity);
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
// Insert a constant into a constant pool that is a slice of a source string.
// Returns the id of the constant, or 0 if any potential calls to resize fail.
yp_constant_id_t yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
// Insert a constant into a constant pool from memory that is now owned by the
// constant pool. Returns the id of the constant, or 0 if any potential calls to
// resize fail.
yp_constant_id_t yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
// Free the memory associated with a constant pool.
void yp_constant_pool_free(yp_constant_pool_t *pool);
void pm_constant_pool_free(pm_constant_pool_t *pool);
#endif

Просмотреть файл

@ -1,20 +1,20 @@
#include "yarp/util/yp_list.h"
#include "prism/util/pm_list.h"
// Returns true if the given list is empty.
YP_EXPORTED_FUNCTION bool
yp_list_empty_p(yp_list_t *list) {
PRISM_EXPORTED_FUNCTION bool
pm_list_empty_p(pm_list_t *list) {
return list->head == NULL;
}
// Returns the size of the list.
YP_EXPORTED_FUNCTION size_t
yp_list_size(yp_list_t *list) {
PRISM_EXPORTED_FUNCTION size_t
pm_list_size(pm_list_t *list) {
return list->size;
}
// Append a node to the given list.
void
yp_list_append(yp_list_t *list, yp_list_node_t *node) {
pm_list_append(pm_list_t *list, pm_list_node_t *node) {
if (list->head == NULL) {
list->head = node;
} else {
@ -26,10 +26,10 @@ yp_list_append(yp_list_t *list, yp_list_node_t *node) {
}
// Deallocate the internal state of the given list.
YP_EXPORTED_FUNCTION void
yp_list_free(yp_list_t *list) {
yp_list_node_t *node = list->head;
yp_list_node_t *next;
PRISM_EXPORTED_FUNCTION void
pm_list_free(pm_list_t *list) {
pm_list_node_t *node = list->head;
pm_list_node_t *next;
while (node != NULL) {
next = node->next;

Просмотреть файл

@ -5,30 +5,30 @@
// The linked list itself operates off a set of pointers. Because the pointers
// are not necessarily sequential, they can be of any size. We use this fact to
// allow the consumer of this linked list to extend the node struct to include
// any data they want. This is done by using the yp_list_node_t as the first
// any data they want. This is done by using the pm_list_node_t as the first
// member of the struct.
//
// For example, if we want to store a list of integers, we can do the following:
//
// typedef struct {
// yp_list_node_t node;
// pm_list_node_t node;
// int value;
// } yp_int_node_t;
// } pm_int_node_t;
//
// yp_list_t list = YP_LIST_EMPTY;
// yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
// pm_list_t list = PM_LIST_EMPTY;
// pm_int_node_t *node = malloc(sizeof(pm_int_node_t));
// node->value = 5;
//
// yp_list_append(&list, &node->node);
// pm_list_append(&list, &node->node);
//
// The yp_list_t struct is used to represent the overall linked list. It
// The pm_list_t struct is used to represent the overall linked list. It
// contains a pointer to the head and tail of the list. This allows for easy
// iteration and appending of new nodes.
#ifndef YARP_LIST_H
#define YARP_LIST_H
#ifndef PRISM_LIST_H
#define PRISM_LIST_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <stdbool.h>
#include <stddef.h>
@ -36,32 +36,32 @@
#include <stdlib.h>
// This represents a node in the linked list.
typedef struct yp_list_node {
struct yp_list_node *next;
} yp_list_node_t;
typedef struct pm_list_node {
struct pm_list_node *next;
} pm_list_node_t;
// This represents the overall linked list. It keeps a pointer to the head and
// tail so that iteration is easy and pushing new nodes is easy.
typedef struct {
size_t size;
yp_list_node_t *head;
yp_list_node_t *tail;
} yp_list_t;
pm_list_node_t *head;
pm_list_node_t *tail;
} pm_list_t;
// This represents an empty list. It's used to initialize a stack-allocated list
// as opposed to a method call.
#define YP_LIST_EMPTY ((yp_list_t) { .size = 0, .head = NULL, .tail = NULL })
#define PM_LIST_EMPTY ((pm_list_t) { .size = 0, .head = NULL, .tail = NULL })
// Returns true if the given list is empty.
YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
// Returns the size of the list.
YP_EXPORTED_FUNCTION size_t yp_list_size(yp_list_t *list);
PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
// Append a node to the given list.
void yp_list_append(yp_list_t *list, yp_list_node_t *node);
void pm_list_append(pm_list_t *list, pm_list_node_t *node);
// Deallocate the internal state of the given list.
YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list);
PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
#endif

Просмотреть файл

@ -1,13 +1,13 @@
#include "yarp/util/yp_memchr.h"
#include "prism/util/pm_memchr.h"
#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
// We need to roll our own memchr to handle cases where the encoding changes and
// we need to search for a character in a buffer that could be the trailing byte
// of a multibyte character.
void *
yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding) {
if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
const uint8_t *source = (const uint8_t *) memory;
size_t index = 0;
@ -29,3 +29,5 @@ yp_memchr(const void *memory, int character, size_t number, bool encoding_change
return memchr(memory, character, number);
}
}
#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM

Просмотреть файл

@ -1,14 +1,14 @@
#ifndef YP_MEMCHR_H
#define YP_MEMCHR_H
#ifndef PRISM_MEMCHR_H
#define PRISM_MEMCHR_H
#include "yarp/defines.h"
#include "yarp/enc/yp_encoding.h"
#include "prism/defines.h"
#include "prism/enc/pm_encoding.h"
#include <stddef.h>
// We need to roll our own memchr to handle cases where the encoding changes and
// we need to search for a character in a buffer that could be the trailing byte
// of a multibyte character.
void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding);
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding);
#endif

Просмотреть файл

@ -1,9 +1,9 @@
#include "yarp/util/yp_newline_list.h"
#include "prism/util/pm_newline_list.h"
// Initialize a new newline list with the given capacity. Returns true if the
// allocation of the offsets succeeds, otherwise returns false.
bool
yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) {
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
if (list->offsets == NULL) return false;
@ -23,7 +23,7 @@ yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capac
// Append a new offset to the newline list. Returns true if the reallocation of
// the offsets succeeds (if one was necessary), otherwise returns false.
bool
yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
if (list->size == list->capacity) {
size_t *original_offsets = list->offsets;
@ -46,17 +46,17 @@ yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
bool
yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) {
pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
if (*cursor != '\n') {
return true;
}
return yp_newline_list_append(list, cursor);
return pm_newline_list_append(list, cursor);
}
// Returns the line and column of the given offset, assuming we don't have any
// information about the previous index that we found.
static yp_line_column_t
yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
static pm_line_column_t
pm_newline_list_line_column_search(pm_newline_list_t *list, size_t offset) {
size_t left = 0;
size_t right = list->size - 1;
@ -64,7 +64,7 @@ yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
size_t mid = left + (right - left) / 2;
if (list->offsets[mid] == offset) {
return ((yp_line_column_t) { mid, 0 });
return ((pm_line_column_t) { mid, 0 });
}
if (list->offsets[mid] < offset) {
@ -74,13 +74,13 @@ yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
}
}
return ((yp_line_column_t) { left - 1, offset - list->offsets[left - 1] });
return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
}
// Returns the line and column of the given offset, assuming we know the last
// index that we found.
static yp_line_column_t
yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
static pm_line_column_t
pm_newline_list_line_column_scan(pm_newline_list_t *list, size_t offset) {
if (offset > list->last_offset) {
size_t index = list->last_index;
while (index < list->size && list->offsets[index] < offset) {
@ -88,10 +88,10 @@ yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
}
if (index == list->size) {
return ((yp_line_column_t) { index - 1, offset - list->offsets[index - 1] });
return ((pm_line_column_t) { index - 1, offset - list->offsets[index - 1] });
}
return ((yp_line_column_t) { index, 0 });
return ((pm_line_column_t) { index, 0 });
} else {
size_t index = list->last_index;
while (index > 0 && list->offsets[index] > offset) {
@ -99,26 +99,26 @@ yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
}
if (index == 0) {
return ((yp_line_column_t) { 0, offset });
return ((pm_line_column_t) { 0, offset });
}
return ((yp_line_column_t) { index, offset - list->offsets[index - 1] });
return ((pm_line_column_t) { index, offset - list->offsets[index - 1] });
}
}
// Returns the line and column of the given offset. If the offset is not in the
// list, the line and column of the closest offset less than the given offset
// are returned.
yp_line_column_t
yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
pm_line_column_t
pm_newline_list_line_column(pm_newline_list_t *list, const uint8_t *cursor) {
assert(cursor >= list->start);
size_t offset = (size_t) (cursor - list->start);
yp_line_column_t result;
pm_line_column_t result;
if (list->last_offset == 0) {
result = yp_newline_list_line_column_search(list, offset);
result = pm_newline_list_line_column_search(list, offset);
} else {
result = yp_newline_list_line_column_scan(list, offset);
result = pm_newline_list_line_column_scan(list, offset);
}
list->last_index = result.line;
@ -129,6 +129,6 @@ yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
// Free the internal memory allocated for the newline list.
void
yp_newline_list_free(yp_newline_list_t *list) {
pm_newline_list_free(pm_newline_list_t *list) {
free(list->offsets);
}

Просмотреть файл

@ -6,10 +6,10 @@
// end column on every node in addition to the offsets that we already store,
// but that would be quite a lot of memory overhead.
#ifndef YP_NEWLINE_LIST_H
#define YP_NEWLINE_LIST_H
#ifndef PRISM_NEWLINE_LIST_H
#define PRISM_NEWLINE_LIST_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <assert.h>
#include <stdbool.h>
@ -27,35 +27,35 @@ typedef struct {
size_t last_offset;
size_t last_index;
} yp_newline_list_t;
} pm_newline_list_t;
// A line and column in a string.
typedef struct {
size_t line;
size_t column;
} yp_line_column_t;
} pm_line_column_t;
#define YP_NEWLINE_LIST_EMPTY ((yp_newline_list_t) { \
#define PM_NEWLINE_LIST_EMPTY ((pm_newline_list_t) { \
.start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
})
// Initialize a new newline list with the given capacity. Returns true if the
// allocation of the offsets succeeds, otherwise returns false.
bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity);
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
// Append a new offset to the newline list. Returns true if the reallocation of
// the offsets succeeds (if one was necessary), otherwise returns false.
bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor);
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor);
bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor);
// Returns the line and column of the given offset. If the offset is not in the
// list, the line and column of the closest offset less than the given offset
// are returned.
yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor);
pm_line_column_t pm_newline_list_line_column(pm_newline_list_t *list, const uint8_t *cursor);
// Free the internal memory allocated for the newline list.
void yp_newline_list_free(yp_newline_list_t *list);
void pm_newline_list_free(pm_newline_list_t *list);
#endif

Просмотреть файл

@ -1,19 +1,19 @@
#include "yarp/util/yp_state_stack.h"
#include "prism/util/pm_state_stack.h"
// Pushes a value onto the stack.
void
yp_state_stack_push(yp_state_stack_t *stack, bool value) {
pm_state_stack_push(pm_state_stack_t *stack, bool value) {
*stack = (*stack << 1) | (value & 1);
}
// Pops a value off the stack.
void
yp_state_stack_pop(yp_state_stack_t *stack) {
pm_state_stack_pop(pm_state_stack_t *stack) {
*stack >>= 1;
}
// Returns the value at the top of the stack.
bool
yp_state_stack_p(yp_state_stack_t *stack) {
pm_state_stack_p(pm_state_stack_t *stack) {
return *stack & 1;
}

Просмотреть файл

@ -1,24 +1,24 @@
#ifndef YP_STATE_STACK_H
#define YP_STATE_STACK_H
#ifndef PRISM_STATE_STACK_H
#define PRISM_STATE_STACK_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <stdbool.h>
#include <stdint.h>
// A struct that represents a stack of bools.
typedef uint32_t yp_state_stack_t;
typedef uint32_t pm_state_stack_t;
// Initializes the state stack to an empty stack.
#define YP_STATE_STACK_EMPTY ((yp_state_stack_t) 0)
#define PM_STATE_STACK_EMPTY ((pm_state_stack_t) 0)
// Pushes a value onto the stack.
void yp_state_stack_push(yp_state_stack_t *stack, bool value);
void pm_state_stack_push(pm_state_stack_t *stack, bool value);
// Pops a value off the stack.
void yp_state_stack_pop(yp_state_stack_t *stack);
void pm_state_stack_pop(pm_state_stack_t *stack);
// Returns the value at the top of the stack.
bool yp_state_stack_p(yp_state_stack_t *stack);
bool pm_state_stack_p(pm_state_stack_t *stack);
#endif

Просмотреть файл

@ -1,4 +1,4 @@
#include "yarp/util/yp_string.h"
#include "prism/util/pm_string.h"
// The following headers are necessary to read files using demand paging.
#ifdef _WIN32
@ -12,11 +12,11 @@
// Initialize a shared string that is based on initial input.
void
yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) {
pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
assert(start <= end);
*string = (yp_string_t) {
.type = YP_STRING_SHARED,
*string = (pm_string_t) {
.type = PM_STRING_SHARED,
.source = start,
.length = (size_t) (end - start)
};
@ -24,9 +24,9 @@ yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *
// Initialize an owned string that is responsible for freeing allocated memory.
void
yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
*string = (yp_string_t) {
.type = YP_STRING_OWNED,
pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
*string = (pm_string_t) {
.type = PM_STRING_OWNED,
.source = source,
.length = length
};
@ -34,18 +34,18 @@ yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
// Initialize a constant string that doesn't own its memory source.
void
yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
*string = (yp_string_t) {
.type = YP_STRING_CONSTANT,
pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
*string = (pm_string_t) {
.type = PM_STRING_CONSTANT,
.source = (const uint8_t *) source,
.length = length
};
}
static void
yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) {
*string = (yp_string_t) {
.type = YP_STRING_MAPPED,
pm_string_mapped_init_internal(pm_string_t *string, uint8_t *source, size_t length) {
*string = (pm_string_t) {
.type = PM_STRING_MAPPED,
.source = source,
.length = length
};
@ -53,9 +53,9 @@ yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t leng
// Returns the memory size associated with the string.
size_t
yp_string_memsize(const yp_string_t *string) {
size_t size = sizeof(yp_string_t);
if (string->type == YP_STRING_OWNED) {
pm_string_memsize(const pm_string_t *string) {
size_t size = sizeof(pm_string_t);
if (string->type == PM_STRING_OWNED) {
size += string->length;
}
return size;
@ -64,39 +64,39 @@ yp_string_memsize(const yp_string_t *string) {
// Ensure the string is owned. If it is not, then reinitialize it as owned and
// copy over the previous source.
void
yp_string_ensure_owned(yp_string_t *string) {
if (string->type == YP_STRING_OWNED) return;
pm_string_ensure_owned(pm_string_t *string) {
if (string->type == PM_STRING_OWNED) return;
size_t length = yp_string_length(string);
const uint8_t *source = yp_string_source(string);
size_t length = pm_string_length(string);
const uint8_t *source = pm_string_source(string);
uint8_t *memory = malloc(length);
if (!memory) return;
yp_string_owned_init(string, memory, length);
pm_string_owned_init(string, memory, length);
memcpy((void *) string->source, source, length);
}
// Returns the length associated with the string.
YP_EXPORTED_FUNCTION size_t
yp_string_length(const yp_string_t *string) {
PRISM_EXPORTED_FUNCTION size_t
pm_string_length(const pm_string_t *string) {
return string->length;
}
// Returns the start pointer associated with the string.
YP_EXPORTED_FUNCTION const uint8_t *
yp_string_source(const yp_string_t *string) {
PRISM_EXPORTED_FUNCTION const uint8_t *
pm_string_source(const pm_string_t *string) {
return string->source;
}
// Free the associated memory of the given string.
YP_EXPORTED_FUNCTION void
yp_string_free(yp_string_t *string) {
PRISM_EXPORTED_FUNCTION void
pm_string_free(pm_string_t *string) {
void *memory = (void *) string->source;
if (string->type == YP_STRING_OWNED) {
if (string->type == PM_STRING_OWNED) {
free(memory);
} else if (string->type == YP_STRING_MAPPED && string->length) {
} else if (string->type == PM_STRING_MAPPED && string->length) {
#if defined(_WIN32)
UnmapViewOfFile(memory);
#else
@ -106,7 +106,7 @@ yp_string_free(yp_string_t *string) {
}
bool
yp_string_mapped_init(yp_string_t *string, const char *filepath) {
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
#ifdef _WIN32
// Open the file for reading.
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
@ -129,7 +129,7 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
if (file_size == 0) {
CloseHandle(file);
uint8_t empty[] = "";
yp_string_mapped_init_internal(string, empty, 0);
pm_string_mapped_init_internal(string, empty, 0);
return true;
}
@ -151,7 +151,7 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
return false;
}
yp_string_mapped_init_internal(string, source, (size_t) file_size);
pm_string_mapped_init_internal(string, source, (size_t) file_size);
return true;
#else
// Open the file for reading
@ -176,7 +176,7 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
if (size == 0) {
close(fd);
uint8_t empty[] = "";
yp_string_mapped_init_internal(string, empty, 0);
pm_string_mapped_init_internal(string, empty, 0);
return true;
}
@ -187,14 +187,14 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
}
close(fd);
yp_string_mapped_init_internal(string, source, size);
pm_string_mapped_init_internal(string, source, size);
return true;
#endif
}
// Returns the size of the yp_string_t struct. This is necessary to allocate the
// Returns the size of the pm_string_t struct. This is necessary to allocate the
// correct amount of memory in the FFI backend.
YP_EXPORTED_FUNCTION size_t
yp_string_sizeof(void) {
return sizeof(yp_string_t);
PRISM_EXPORTED_FUNCTION size_t
pm_string_sizeof(void) {
return sizeof(pm_string_t);
}

Просмотреть файл

@ -1,7 +1,7 @@
#ifndef YARP_STRING_H
#define YARP_STRING_H
#ifndef PRISM_STRING_H
#define PRISM_STRING_H
#include "yarp/defines.h"
#include "prism/defines.h"
#include <assert.h>
#include <stdbool.h>
@ -11,51 +11,51 @@
// This struct represents a string value.
typedef struct {
enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
const uint8_t *source;
size_t length;
} yp_string_t;
} pm_string_t;
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
#define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
// Initialize a shared string that is based on initial input.
void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end);
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
// Initialize an owned string that is responsible for freeing allocated memory.
void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length);
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
// Initialize a constant string that doesn't own its memory source.
void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
// Read the file indicated by the filepath parameter into source and load its
// contents and size into the given yp_string_t.
// The given yp_string_t should be freed using yp_string_free() when it is no longer used.
// contents and size into the given pm_string_t.
// The given pm_string_t should be freed using pm_string_free() when it is no longer used.
//
// We want to use demand paging as much as possible in order to avoid having to
// read the entire file into memory (which could be detrimental to performance
// for large files). This means that if we're on windows we'll use
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
// `mmap`, and on other POSIX systems we'll use `read`.
YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
// Returns the memory size associated with the string.
size_t yp_string_memsize(const yp_string_t *string);
size_t pm_string_memsize(const pm_string_t *string);
// Ensure the string is owned. If it is not, then reinitialize it as owned and
// copy over the previous source.
void yp_string_ensure_owned(yp_string_t *string);
void pm_string_ensure_owned(pm_string_t *string);
// Returns the length associated with the string.
YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
// Returns the start pointer associated with the string.
YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string);
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
// Free the associated memory of the given string.
YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
// Returns the size of the yp_string_t struct. This is necessary to allocate the
// Returns the size of the pm_string_t struct. This is necessary to allocate the
// correct amount of memory in the FFI backend.
YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
#endif // YARP_STRING_H
#endif // PRISM_STRING_H

Просмотреть файл

@ -1,21 +1,21 @@
#include "yarp/util/yp_string_list.h"
#include "prism/util/pm_string_list.h"
// Initialize a yp_string_list_t with its default values.
// Initialize a pm_string_list_t with its default values.
void
yp_string_list_init(yp_string_list_t *string_list) {
string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
pm_string_list_init(pm_string_list_t *string_list) {
string_list->strings = (pm_string_t *) malloc(sizeof(pm_string_t));
string_list->length = 0;
string_list->capacity = 1;
}
// Append a yp_string_t to the given string list.
// Append a pm_string_t to the given string list.
void
yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
if (string_list->length + 1 > string_list->capacity) {
yp_string_t *original_string = string_list->strings;
pm_string_t *original_string = string_list->strings;
string_list->capacity *= 2;
string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t));
memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t));
string_list->strings = (pm_string_t *) malloc(string_list->capacity * sizeof(pm_string_t));
memcpy(string_list->strings, original_string, (string_list->length) * sizeof(pm_string_t));
free(original_string);
}
@ -24,6 +24,6 @@ yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
// Free the memory associated with the string list.
void
yp_string_list_free(yp_string_list_t *string_list) {
pm_string_list_free(pm_string_list_t *string_list) {
free(string_list->strings);
}

Просмотреть файл

@ -1,25 +1,25 @@
#ifndef YARP_STRING_LIST_H
#define YARP_STRING_LIST_H
#ifndef PRISM_STRING_LIST_H
#define PRISM_STRING_LIST_H
#include "yarp/defines.h"
#include "yarp/util/yp_string.h"
#include "prism/defines.h"
#include "prism/util/pm_string.h"
#include <stddef.h>
#include <stdlib.h>
typedef struct {
yp_string_t *strings;
pm_string_t *strings;
size_t length;
size_t capacity;
} yp_string_list_t;
} pm_string_list_t;
// Initialize a yp_string_list_t with its default values.
YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
// Initialize a pm_string_list_t with its default values.
PRISM_EXPORTED_FUNCTION void pm_string_list_init(pm_string_list_t *string_list);
// Append a yp_string_t to the given string list.
void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string);
// Append a pm_string_t to the given string list.
void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
// Free the memory associated with the string list.
YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list);
PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
#endif

Просмотреть файл

@ -3,7 +3,7 @@
#include <stdint.h>
int
yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
size_t offset = 0;
int difference = 0;

Просмотреть файл

@ -1,8 +1,8 @@
#include "yarp/util/yp_strpbrk.h"
#include "prism/util/pm_strpbrk.h"
// This is the slow path that does care about the encoding.
static inline const uint8_t *
yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
size_t index = 0;
while (index < maximum) {
@ -23,7 +23,7 @@ yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t
// This is the fast path that does not care about the encoding.
static inline const uint8_t *
yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
size_t index = 0;
while (index < maximum) {
@ -39,9 +39,9 @@ yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
// Here we have rolled our own version of strpbrk. The standard library strpbrk
// has undefined behavior when the source string is not null-terminated. We want
// to support strings that are not null-terminated because yp_parse does not
// to support strings that are not null-terminated because pm_parse does not
// have the contract that the string is null-terminated. (This is desirable
// because it means the extension can call yp_parse with the result of a call to
// because it means the extension can call pm_parse with the result of a call to
// mmap).
//
// The standard library strpbrk also does not support passing a maximum length
@ -55,12 +55,12 @@ yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
// Shift-JIS, the backslash character can be a trailing byte. In that case we
// need to take a slower path and iterate one multi-byte character at a time.
const uint8_t *
yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
if (length <= 0) {
return NULL;
} else if (parser->encoding_changed && parser->encoding.multibyte) {
return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
} else {
return yp_strpbrk_single_byte(source, charset, (size_t) length);
return pm_strpbrk_single_byte(source, charset, (size_t) length);
}
}

Просмотреть файл

@ -1,17 +1,17 @@
#ifndef YP_STRPBRK_H
#define YP_STRPBRK_H
#ifndef PRISM_STRPBRK_H
#define PRISM_STRPBRK_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include "prism/defines.h"
#include "prism/parser.h"
#include <stddef.h>
#include <string.h>
// Here we have rolled our own version of strpbrk. The standard library strpbrk
// has undefined behavior when the source string is not null-terminated. We want
// to support strings that are not null-terminated because yp_parse does not
// to support strings that are not null-terminated because pm_parse does not
// have the contract that the string is null-terminated. (This is desirable
// because it means the extension can call yp_parse with the result of a call to
// because it means the extension can call pm_parse with the result of a call to
// mmap).
//
// The standard library strpbrk also does not support passing a maximum length
@ -24,6 +24,6 @@
// characters that are trailing bytes of multi-byte characters. For example, in
// Shift-JIS, the backslash character can be a trailing byte. In that case we
// need to take a slower path and iterate one multi-byte character at a time.
const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
#endif

Просмотреть файл

@ -1,4 +1,4 @@
#define YP_VERSION_MAJOR 0
#define YP_VERSION_MINOR 12
#define YP_VERSION_PATCH 0
#define YP_VERSION "0.12.0"
#define PRISM_VERSION_MAJOR 0
#define PRISM_VERSION_MINOR 12
#define PRISM_VERSION_PATCH 0
#define PRISM_VERSION "0.12.0"

Просмотреть файл

@ -6,7 +6,7 @@ return if RUBY_ENGINE == "jruby" || RUBY_ENGINE == "truffleruby"
require_relative "test_helper"
module YARP
module Prism
class BOMTest < TestCase
def test_ident
assert_bom("foo")
@ -53,7 +53,7 @@ module YARP
def assert_bom(source)
bommed = "\xEF\xBB\xBF#{source}"
assert_equal YARP.lex_ripper(bommed), YARP.lex_compat(bommed).value
assert_equal Prism.lex_ripper(bommed), Prism.lex_compat(bommed).value
end
end
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class CommentsTest < TestCase
def test_comment_inline
source = "# comment"
@ -67,7 +67,7 @@ module YARP
end # Foo end
RUBY
result = YARP.parse(source)
result = Prism.parse(source)
result.attach_comments!
tree = result.value
class_node = tree.statements.body.first
@ -92,7 +92,7 @@ module YARP
end_column: end_column
}
result = YARP.parse(source)
result = Prism.parse(source)
assert result.errors.empty?, result.errors.map(&:message).join("\n")
assert_equal type, result.comments.first.type

Просмотреть файл

@ -2,9 +2,9 @@
require_relative "test_helper"
module YARP
module Prism
class CompilerTest < TestCase
class SExpressions < YARP::Compiler
class SExpressions < Prism::Compiler
def visit_arguments_node(node)
[:arguments, super]
end
@ -24,7 +24,7 @@ module YARP
def test_compiler
expected = [:program, [[[:call, [[:integer], [:arguments, [[:integer]]]]]]]]
assert_equal expected, YARP.parse("1 + 2").value.accept(SExpressions.new)
assert_equal expected, Prism.parse("1 + 2").value.accept(SExpressions.new)
end
end
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class DesugarCompilerTest < TestCase
def test_and_write
assert_desugars("(AndNode (ClassVariableReadNode) (ClassVariableWriteNode (CallNode)))", "@@foo &&= bar")
@ -72,14 +72,14 @@ module YARP
end
def assert_desugars(expected, source)
ast = YARP.parse(source).value.accept(DesugarCompiler.new)
ast = Prism.parse(source).value.accept(DesugarCompiler.new)
assert_equal expected, ast_inspect(ast.statements.body.last)
ast.accept(EnsureEveryNodeOnceInAST.new)
end
def assert_not_desugared(source, reason)
ast = YARP.parse(source).value
ast = Prism.parse(source).value
assert_equal_nodes(ast, ast.accept(DesugarCompiler.new))
end
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class DispatcherTest < TestCase
class TestListener
attr_reader :events_received
@ -29,7 +29,7 @@ module YARP
dispatcher = Dispatcher.new
dispatcher.register(listener, :on_call_node_enter, :on_call_node_leave, :on_integer_node_enter)
root = YARP.parse(<<~RUBY).value
root = Prism.parse(<<~RUBY).value
def foo
something(1, 2, 3)
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class EncodingTest < TestCase
%w[
ascii
@ -39,27 +39,27 @@ module YARP
CP1252
].each do |encoding|
define_method "test_encoding_#{encoding}" do
result = YARP.parse("# encoding: #{encoding}\nident")
result = Prism.parse("# encoding: #{encoding}\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find(encoding), actual
end
end
def test_coding
result = YARP.parse("# coding: utf-8\nident")
result = Prism.parse("# coding: utf-8\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("utf-8"), actual
end
def test_coding_with_whitespace
result = YARP.parse("# coding \t \r \v : \t \v \r ascii-8bit \nident")
result = Prism.parse("# coding \t \r \v : \t \v \r ascii-8bit \nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("ascii-8bit"), actual
end
def test_emacs_style
result = YARP.parse("# -*- coding: utf-8 -*-\nident")
result = Prism.parse("# -*- coding: utf-8 -*-\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("utf-8"), actual
end
@ -67,7 +67,7 @@ module YARP
# This test may be a little confusing. Basically when we use our strpbrk, it
# takes into account the encoding of the file.
def test_strpbrk_multibyte
result = YARP.parse(<<~RUBY)
result = Prism.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
@ -86,19 +86,19 @@ module YARP
utf-8-mac
utf-8-*
].each do |encoding|
result = YARP.parse("# coding: #{encoding}\nident")
result = Prism.parse("# coding: #{encoding}\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("utf-8"), actual
end
end
def test_first_lexed_token
encoding = YARP.lex("# encoding: ascii-8bit").value[0][0].value.encoding
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
assert_equal Encoding.find("ascii-8bit"), encoding
end
def test_slice_encoding
slice = YARP.parse("# encoding: Shift_JIS\n").value.slice
slice = Prism.parse("# encoding: Shift_JIS\n").value.slice
assert_equal (+"").force_encoding(Encoding::SHIFT_JIS), slice
assert_equal Encoding::SHIFT_JIS, slice.encoding
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class ErrorsTest < TestCase
include DSL
@ -1175,7 +1175,7 @@ module YARP
end
def test_invalid_message_name
result = YARP.parse("+.@foo,+=foo")
result = Prism.parse("+.@foo,+=foo")
assert_equal "", result.value.statements.body.first.write_name
end
@ -1354,7 +1354,7 @@ module YARP
# Ripper behaves differently on JRuby/TruffleRuby, so only check this on CRuby
assert_nil Ripper.sexp_raw(source) if compare_ripper
result = YARP.parse(source)
result = Prism.parse(source)
node = result.value.statements.body.last
assert_equal_nodes(expected, node, compare_location: false)
@ -1363,12 +1363,12 @@ module YARP
def assert_error_messages(source, errors, compare_ripper: RUBY_ENGINE == "ruby")
assert_nil Ripper.sexp_raw(source) if compare_ripper
result = YARP.parse(source)
result = Prism.parse(source)
assert_equal(errors, result.errors.map(&:message))
end
def expression(source)
YARP.parse(source).value.statements.body.last
Prism.parse(source).value.statements.body.last
end
end
end

Просмотреть файл

@ -31,7 +31,7 @@ A
j]
# ripper can't parse this successfully, though ruby runs it correctly
# TODO: yarp does not include the "\n" in "l\nl" in the AST like ruby does
# TODO: prism does not include the "\n" in "l\nl" in the AST like ruby does
pp <<-A, %W[l\
k
A
@ -43,8 +43,8 @@ m
A
n]
# ripper gets this one wrong in the same way that YARP does ...
# TODO: yarp does not include the "\n" in "p\np" in the AST like ruby does
# ripper gets this one wrong in the same way that prism does ...
# TODO: prism does not include the "\n" in "p\np" in the AST like ruby does
pp <<-A, %I[p\
o
A

Просмотреть файл

@ -2,11 +2,11 @@
require_relative "test_helper"
module YARP
module Prism
# These tests are simply to exercise snippets found by the fuzzer that caused invalid memory access.
class FuzzerTest < TestCase
def self.snippet(name, source)
define_method(:"test_fuzzer_#{name}") { YARP.dump(source) }
define_method(:"test_fuzzer_#{name}") { Prism.dump(source) }
end
snippet "incomplete global variable", "$"

Просмотреть файл

@ -2,13 +2,13 @@
require_relative "test_helper"
module YARP
module Prism
class HeredocDedentTest < TestCase
filepath = File.expand_path("fixtures/tilde_heredocs.txt", __dir__)
File.read(filepath).split(/(?=\n)\n(?=<)/).each_with_index do |heredoc, index|
define_method "test_heredoc_#{index}" do
node = YARP.parse(heredoc).value.statements.body.first
node = Prism.parse(heredoc).value.statements.body.first
if node.is_a? StringNode
actual = node.unescaped
else

Просмотреть файл

@ -4,9 +4,9 @@ require_relative "test_helper"
return if RUBY_PLATFORM !~ /linux/
module YARP
module Prism
#
# examine a yarp dll or static archive for expected external symbols.
# examine a prism dll or static archive for expected external symbols.
# these tests only work on a linux system right now.
#
class LibrarySymbolsTest < TestCase
@ -15,7 +15,7 @@ module YARP
@librubyparser_a = File.expand_path("../../build/librubyparser.a", __dir__)
@librubyparser_so = File.expand_path("../../build/librubyparser.so", __dir__)
@yarp_so = File.expand_path("../../lib/yarp/yarp.so", __dir__)
@prism_so = File.expand_path("../../lib/prism/prism.so", __dir__)
end
# objdump runner and helpers
@ -64,12 +64,12 @@ module YARP
assert_empty(names(visible_global_objdump_symbols(@librubyparser_a)))
end
def test_librubyparser_a_contains_hidden_yp_symbols
def test_librubyparser_a_contains_hidden_pm_symbols
omit("librubyparser.a is not built") unless File.exist?(@librubyparser_a)
names(hidden_global_objdump_symbols(@librubyparser_a)).tap do |symbols|
assert_includes(symbols, "yp_parse")
assert_includes(symbols, "yp_version")
assert_includes(symbols, "pm_parse")
assert_includes(symbols, "pm_version")
end
end
@ -80,23 +80,23 @@ module YARP
omit("librubyparser.so is not built") unless File.exist?(@librubyparser_so)
names(global_nm_symbols(@librubyparser_so)).tap do |symbols|
assert_includes(symbols, "yp_parse")
assert_includes(symbols, "yp_version")
assert_includes(symbols, "pm_parse")
assert_includes(symbols, "pm_version")
end
names(local_nm_symbols(@librubyparser_so)).tap do |symbols|
assert_includes(symbols, "yp_encoding_shift_jis_isupper_char")
assert_includes(symbols, "pm_encoding_shift_jis_isupper_char")
end
# TODO: someone who uses this library needs to finish this test
end
#
# shared object - yarp.so
# shared object - prism.so
#
def test_yarp_so_exports_only_the_C_extension_init_function
omit("yarp.so is not built") unless File.exist?(@yarp_so)
def test_prism_so_exports_only_the_C_extension_init_function
omit("prism.so is not built") unless File.exist?(@prism_so)
names(global_nm_symbols(@yarp_so)).tap do |symbols|
assert_equal(["Init_yarp"], symbols)
names(global_nm_symbols(@prism_so)).tap do |symbols|
assert_equal(["Init_prism"], symbols)
end
end
end

Просмотреть файл

@ -15,7 +15,7 @@ return if RUBY_PLATFORM =~ /i686/
require_relative "test_helper"
module YARP
module Prism
class LocalsTest < TestCase
invalid = []
todos = []
@ -93,7 +93,7 @@ module YARP
source = File.read(filepath)
expected = Debug.cruby_locals(source)
actual = Debug.yarp_locals(source)
actual = Debug.prism_locals(source)
assert_equal(expected, actual)
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class LocationTest < TestCase
def test_AliasGlobalVariableNode
assert_location(AliasGlobalVariableNode, "alias $foo $bar")
@ -839,7 +839,7 @@ module YARP
end
def test_all_tested
expected = YARP.constants.grep(/.Node$/).sort - %i[MissingNode ProgramNode]
expected = Prism.constants.grep(/.Node$/).sort - %i[MissingNode ProgramNode]
actual = LocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort
assert_equal expected, actual
end
@ -847,7 +847,7 @@ module YARP
private
def assert_location(kind, source, expected = 0...source.length)
result = YARP.parse(source)
result = Prism.parse(source)
assert_equal [], result.comments
assert_equal [], result.errors

Просмотреть файл

@ -2,9 +2,9 @@
require_relative "test_helper"
return if YARP::BACKEND == :FFI
return if Prism::BACKEND == :FFI
module YARP
module Prism
class MemsizeTest < TestCase
def test_memsize
result = Debug.memsize("2 + 3")

Просмотреть файл

@ -4,7 +4,7 @@ require_relative "test_helper"
return unless defined?(RubyVM::InstructionSequence)
module YARP
module Prism
class NewlineTest < TestCase
base = File.dirname(__dir__)
Dir["{lib,test}/**/*.rb", base: base].each do |relative|
@ -20,9 +20,9 @@ module YARP
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
expected = rubyvm_lines(source)
result = YARP.parse_file(filepath)
result = Prism.parse_file(filepath)
assert_empty result.errors
actual = yarp_lines(result)
actual = prism_lines(result)
source.each_line.with_index(1) do |line, line_number|
# Lines like `while (foo = bar)` result in two line flags in the
@ -74,7 +74,7 @@ module YARP
lines.sort
end
def yarp_lines(result)
def prism_lines(result)
result.mark_newlines!
queue = [result.value]

Просмотреть файл

@ -2,13 +2,13 @@
require_relative "test_helper"
return if YARP::BACKEND == :FFI
return if Prism::BACKEND == :FFI
module YARP
module Prism
class ParseSerializeTest < TestCase
def test_parse_serialize
dumped = Debug.parse_serialize_file(__FILE__)
result = YARP.load(File.read(__FILE__), dumped)
result = Prism.load(File.read(__FILE__), dumped)
assert_kind_of ParseResult, result, "Expected the return value to be a ParseResult"
assert_equal __FILE__, find_file_node(result)&.filepath, "Expected the filepath to be set correctly"
@ -19,7 +19,7 @@ module YARP
metadata = [filepath.bytesize, filepath.b, 1, 1, 1, "foo".b].pack("LA*LLLA*")
dumped = Debug.parse_serialize_file_metadata(filepath, metadata)
result = YARP.load(File.read(__FILE__), dumped)
result = Prism.load(File.read(__FILE__), dumped)
assert_kind_of ParseResult, result, "Expected the return value to be a ParseResult"
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class ParseTest < TestCase
# When we pretty-print the trees to compare against the snapshots, we want to
# be certain that we print with the same external encoding. This is because
@ -20,26 +20,26 @@ module YARP
end
def test_empty_string
result = YARP.parse("")
result = Prism.parse("")
assert_equal [], result.value.statements.body
end
def test_parse_takes_file_path
filepath = "filepath.rb"
result = YARP.parse("def foo; __FILE__; end", filepath)
result = Prism.parse("def foo; __FILE__; end", filepath)
assert_equal filepath, find_source_file_node(result.value).filepath
end
def test_parse_lex
node, tokens = YARP.parse_lex("def foo; end").value
node, tokens = Prism.parse_lex("def foo; end").value
assert_kind_of ProgramNode, node
assert_equal 5, tokens.length
end
def test_parse_lex_file
node, tokens = YARP.parse_lex_file(__FILE__).value
node, tokens = Prism.parse_lex_file(__FILE__).value
assert_kind_of ProgramNode, node
refute_empty tokens
@ -85,23 +85,12 @@ module YARP
# and explicitly set the external encoding to UTF-8 to override the binmode default.
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
if ripper_should_parse
src = source
case relative
when /break|next|redo|if|unless|rescue|control|keywords|retry/
# Uncaught syntax errors: Invalid break, Invalid next
src = "->do\nrescue\n#{src}\nend"
ripper_should_match = false
end
# Make sure that it can be correctly parsed by Ripper. If it can't, then we have a fixture
# that is invalid Ruby.
refute_nil(Ripper.sexp_raw(src), "Ripper failed to parse")
end
# Make sure that it can be correctly parsed by Ripper. If it can't, then we have a fixture
# that is invalid Ruby.
refute_nil(Ripper.sexp_raw(source), "Ripper failed to parse") if ripper_should_parse
# Next, assert that there were no errors during parsing.
result = YARP.parse(source, relative)
result = Prism.parse(source, relative)
assert_empty result.errors
# Next, pretty print the source.
@ -128,7 +117,7 @@ module YARP
# Next, assert that the value can be serialized and deserialized without
# changing the shape of the tree.
assert_equal_nodes(result.value, YARP.load(source, YARP.dump(source, relative)).value)
assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, relative)).value)
# Next, check that the location ranges of each node in the tree are a
# superset of their respective child nodes.
@ -142,13 +131,13 @@ module YARP
if ripper_should_parse && ripper_should_match
# Finally, assert that we can lex the source and get the same tokens as
# Ripper.
lex_result = YARP.lex_compat(source)
lex_result = Prism.lex_compat(source)
assert_equal [], lex_result.errors
tokens = lex_result.value
begin
YARP.lex_ripper(source).zip(tokens).each do |(ripper, yarp)|
assert_equal ripper, yarp
Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)|
assert_equal ripper, prism
end
rescue SyntaxError
raise ArgumentError, "Test file has invalid syntax #{filepath}"
@ -171,10 +160,10 @@ module YARP
file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
snippet = snippet.rstrip
result = YARP.parse(snippet, relative)
result = Prism.parse(snippet, relative)
assert_empty result.errors
assert_equal_nodes(result.value, YARP.load(snippet, YARP.dump(snippet, relative)).value)
assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, relative)).value)
end
end
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class PatternTest < TestCase
def test_invalid_syntax
assert_raise(Pattern::CompilationError) { scan("", "<>") }
@ -69,7 +69,7 @@ module YARP
end
def test_constant_path
results = scan("Foo + Bar + Baz", "YARP::ConstantReadNode")
results = scan("Foo + Bar + Baz", "Prism::ConstantReadNode")
assert_equal 3, results.length
end
@ -84,7 +84,7 @@ module YARP
results = scan("Foo + Bar + Baz", "{ name: /^[[:punct:]]$/ }")
assert_equal 2, results.length
assert_equal ["YARP::CallNode"], results.map { |node| node.class.name }.uniq
assert_equal ["Prism::CallNode"], results.map { |node| node.class.name }.uniq
end
def test_nil
@ -126,7 +126,7 @@ module YARP
private
def scan(source, query)
YARP::Pattern.new(query).scan(YARP.parse(source).value).to_a
Prism::Pattern.new(query).scan(Prism.parse(source).value).to_a
end
end
end

Просмотреть файл

@ -2,9 +2,9 @@
require_relative "test_helper"
return if YARP::BACKEND == :FFI
return if Prism::BACKEND == :FFI
module YARP
module Prism
class RegexpTest < TestCase
##############################################################################
# These tests test the actual use case of extracting named capture groups
@ -236,7 +236,7 @@ module YARP
def options(flags)
options =
["/foo/#{flags}", "/foo\#{1}/#{flags}"].map do |source|
YARP.parse(source).value.statements.body.first.options
Prism.parse(source).value.statements.body.first.options
end
# Check that we get the same set of options from both regular expressions

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class RipperCompatTest < TestCase
def test_1_plus_2
assert_equivalent("1 + 2")

Просмотреть файл

@ -2,19 +2,19 @@
require_relative "test_helper"
module YARP
module Prism
class RubyAPITest < TestCase
def test_ruby_api
filepath = __FILE__
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
assert_equal YARP.lex(source, filepath).value, YARP.lex_file(filepath).value
assert_equal YARP.dump(source, filepath), YARP.dump_file(filepath)
assert_equal Prism.lex(source, filepath).value, Prism.lex_file(filepath).value
assert_equal Prism.dump(source, filepath), Prism.dump_file(filepath)
serialized = YARP.dump(source, filepath)
ast1 = YARP.load(source, serialized).value
ast2 = YARP.parse(source, filepath).value
ast3 = YARP.parse_file(filepath).value
serialized = Prism.dump(source, filepath)
ast1 = Prism.load(source, serialized).value
ast2 = Prism.parse(source, filepath).value
ast3 = Prism.parse_file(filepath).value
assert_equal_nodes ast1, ast2
assert_equal_nodes ast2, ast3
@ -58,7 +58,7 @@ module YARP
private
def parse_expression(source)
YARP.parse(source).value.statements.body.first
Prism.parse(source).value.statements.body.first
end
end
end

Просмотреть файл

@ -1,12 +1,12 @@
# frozen_string_literal: true
require "yarp"
require "prism"
require "ripper"
require "pp"
require "test/unit"
require "tempfile"
puts "Using YARP backend: #{YARP::BACKEND}" if ENV["YARP_FFI_BACKEND"]
puts "Using prism backend: #{Prism::BACKEND}" if ENV["PRISM_FFI_BACKEND"]
# It is useful to have a diff even if the strings to compare are big
# However, ruby/ruby does not have a version of Test::Unit with access to
@ -15,7 +15,7 @@ if defined?(Test::Unit::Assertions::AssertionMessage)
Test::Unit::Assertions::AssertionMessage.max_diff_target_string_size = 5000
end
module YARP
module Prism
class TestCase < ::Test::Unit::TestCase
private

Просмотреть файл

@ -2,9 +2,9 @@
require_relative "test_helper"
return if YARP::BACKEND == :FFI
return if Prism::BACKEND == :FFI
module YARP
module Prism
class UnescapeNoneTest < TestCase
def test_backslash
assert_unescape_none("\\")

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
module YARP
module Prism
class VersionTest < TestCase
def test_version_is_set
refute_nil VERSION