[ruby/yarp] Introduce owned constants

Before this commit, constants in the constant pool were assumed to
be slices of the source string. This works in _almost_ all cases.

There are times, however, when a string needs to be synthesized.
This can occur when passing in locals that need to be scoped through
eval, or when generating method names like `foo=`.

After this commit, there is a single bit `owned` boolean on
constants in the pool that indicates whether or not it is a slice
of the source string. If it is not, it is assumed to be allocated
memory that should be freed by the constant pool when the constant
pool is freed.

When serializing, the most significant bit in the location of the
contents of the constant indicates whether or not it is owned.
When it is, instead of 4 bytes for the source offset and 4 bytes
for the length it is instead 4 bytes for the buffer offset and 4
bytes the length. The contents of the owned constants are embedded
into the buffer after the constant pool itself.

https://github.com/ruby/yarp/commit/461c047365
This commit is contained in:
Kevin Newton 2023-09-06 12:43:36 -04:00 коммит произвёл git
Родитель 0adca625ee
Коммит 194584f202
7 изменённых файлов: 142 добавлений и 26 удалений

Просмотреть файл

@ -14,6 +14,16 @@ module YARP
assert_equal __FILE__, find_file_node(result)&.filepath, "Expected the filepath to be set correctly"
end
def test_parse_serialize_with_locals
filepath = __FILE__
metadata = [filepath.bytesize, filepath.b, 1, 1, 1, "foo".b].pack("LA*LLLA*")
dumped = Debug.parse_serialize_file_metadata(filepath, metadata)
result = YARP.load(File.read(__FILE__), dumped)
assert_kind_of ParseResult, result, "Expected the return value to be a ParseResult"
end
private
def find_file_node(result)

Просмотреть файл

@ -144,30 +144,41 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
// <%= field.name %>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_pop(value_stack);
<%- when YARP::NodeListField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
}
<%- when YARP::StringField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = yp_string_new(&cast-><%= field.name %>, encoding);
<%- when YARP::ConstantField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
assert(cast-><%= field.name %> != 0);
argv[<%= index %>] = rb_id2sym(constants[cast-><%= field.name %> - 1]);
<%- when YARP::OptionalConstantField -%>
argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : rb_id2sym(constants[cast-><%= field.name %> - 1]);
<%- when YARP::ConstantListField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
assert(cast-><%= field.name %>.ids[index] != 0);
rb_ary_push(argv[<%= index %>], rb_id2sym(constants[cast-><%= field.name %>.ids[index] - 1]));
}
<%- when YARP::LocationField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = yp_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when YARP::OptionalLocationField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : yp_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when YARP::UInt32Field -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(cast-><%= field.name %>);
<%- when YARP::FlagsField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(node->flags >> <%= YARP::COMMON_FLAGS %>);
<%- else -%>
<%- raise -%>

Просмотреть файл

@ -163,11 +163,16 @@ module YARP
unless constant
offset = constant_pool_offset + index * 8
start = serialized.unpack1("L", offset: offset)
length = serialized.unpack1("L", offset: offset + 4)
constant = input.byteslice(start, length).to_sym
constant =
if start.nobits?(1 << 31)
input.byteslice(start, length).to_sym
else
serialized.byteslice(start & ((1 << 31) - 1), length).to_sym
end
constant_pool[index] = constant
end

Просмотреть файл

@ -206,12 +206,31 @@ yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer)
// If we find a constant at this index, serialize it at the correct
// index in the buffer.
if (constant->id != 0) {
size_t buffer_offset = offset + ((constant->id - 1) * 8);
size_t buffer_offset = offset + ((((size_t) constant->id) - 1) * 8);
uint32_t source_offset = yp_ptrdifft_to_u32(constant->start - parser->start);
if (constant->owned) {
// Since this is an owned constant, we are going to write its
// contents into the buffer after the constant pool. So
// effectively in place of the source offset, we have a buffer
// offset. We will add a leading 1 to indicate that this is a
// buffer offset.
uint32_t content_offset = yp_sizet_to_u32(buffer->length);
uint32_t owned_mask = (uint32_t) (1 << 31);
assert(content_offset < owned_mask);
content_offset |= owned_mask;
memcpy(buffer->value + buffer_offset, &content_offset, 4);
yp_buffer_append_bytes(buffer, constant->start, constant->length);
} else {
// Since this is a shared constant, we are going to write its
// source offset directly into the buffer.
uint32_t source_offset = yp_ptrdifft_to_u32(constant->start - parser->start);
memcpy(buffer->value + buffer_offset, &source_offset, 4);
}
// Now we can write the length of the constant into the buffer.
uint32_t constant_length = yp_sizet_to_u32(constant->length);
memcpy(buffer->value + buffer_offset, &source_offset, 4);
memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
}
}

Просмотреть файл

@ -106,12 +106,11 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
return true;
}
// Insert a constant into a constant pool. Returns the id of the constant, or 0
// if any potential calls to resize fail.
yp_constant_id_t
// Insert a constant into a constant pool and return its index in the pool.
static size_t
yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
if (pool->size >= (pool->capacity / 4 * 3)) {
if (!yp_constant_pool_resize(pool)) return 0;
if (!yp_constant_pool_resize(pool)) return pool->capacity;
}
size_t hash = yp_constant_pool_hash(start, length);
@ -123,25 +122,59 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l
// same as the content we are trying to insert. If it is, then we can
// return the id of the existing constant.
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
return pool->constants[index].id;
return index;
}
index = (index + 1) % pool->capacity;
}
yp_constant_id_t id = (yp_constant_id_t)++pool->size;
pool->size++;
assert(pool->size < ((size_t) (1 << 31)));
pool->constants[index] = (yp_constant_t) {
.id = id,
.id = (unsigned int) (pool->size & 0x7FFFFFFF),
.start = start,
.length = length,
.hash = hash
};
return id;
return index;
}
// Insert a constant into a constant pool. Returns the id of the constant, or 0
// if any potential calls to resize fail.
yp_constant_id_t
yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
size_t index = yp_constant_pool_insert(pool, start, length);
return index == pool->capacity ? 0 : ((yp_constant_id_t) pool->constants[index].id);
}
// Insert a constant into a constant pool from memory that is now owned by the
// constant pool. Returns the id of the constant, or 0 if any potential calls to
// resize fail.
yp_constant_id_t
yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
size_t index = yp_constant_pool_insert(pool, start, length);
if (index == pool->capacity) return 0;
yp_constant_t *constant = &pool->constants[index];
constant->owned = true;
return ((yp_constant_id_t) constant->id);
}
// Free the memory associated with a constant pool.
void
yp_constant_pool_free(yp_constant_pool_t *pool) {
// For each constant in the current constant pool, free the contents if the
// contents are owned.
for (uint32_t index = 0; index < pool->capacity; index++) {
yp_constant_t *constant = &pool->constants[index];
// If an id is set on this constant, then we know we have content here.
if (constant->id != 0 && constant->owned) {
free((void *) constant->start);
}
}
free(pool->constants);
}

Просмотреть файл

@ -8,6 +8,7 @@
#include "yarp/defines.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
@ -39,7 +40,8 @@ size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list);
void yp_constant_id_list_free(yp_constant_id_list_t *list);
typedef struct {
yp_constant_id_t id;
unsigned int id: 31;
bool owned: 1;
const uint8_t *start;
size_t length;
size_t hash;
@ -57,9 +59,14 @@ typedef struct {
// Initialize a new constant pool with a given capacity.
bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
// Insert a constant into a constant pool. Returns the id of the constant, or 0
// if any potential calls to resize fail.
yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
// Insert a constant into a constant pool that is a slice of a source string.
// Returns the id of the constant, or 0 if any potential calls to resize fail.
yp_constant_id_t yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
// Insert a constant into a constant pool from memory that is now owned by the
// constant pool. Returns the id of the constant, or 0 if any potential calls to
// resize fail.
yp_constant_id_t yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
// Free the memory associated with a constant pool.
void yp_constant_pool_free(yp_constant_pool_t *pool);

Просмотреть файл

@ -428,7 +428,13 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
// Retrieve the constant pool id for the given location.
static inline yp_constant_id_t
yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
return yp_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
}
// Retrieve the constant pool id for the given string.
static inline yp_constant_id_t
yp_parser_constant_id_owned(yp_parser_t *parser, const uint8_t *start, size_t length) {
return yp_constant_pool_insert_owned(&parser->constant_pool, start, length);
}
// Retrieve the constant pool id for the given token.
@ -4610,15 +4616,19 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
return -1;
}
// Add a constant id to the local table of the current scope.
static inline void
yp_parser_local_add(yp_parser_t *parser, yp_constant_id_t constant_id) {
if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
}
}
// Add a local variable from a location to the current scope.
static yp_constant_id_t
yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
}
if (constant_id != 0) yp_parser_local_add(parser, constant_id);
return constant_id;
}
@ -4628,6 +4638,13 @@ yp_parser_local_add_token(yp_parser_t *parser, yp_token_t *token) {
yp_parser_local_add_location(parser, token->start, token->end);
}
// Add a local variable from an owned string to the current scope.
static inline void
yp_parser_local_add_owned(yp_parser_t *parser, const uint8_t *start, size_t length) {
yp_constant_id_t constant_id = yp_parser_constant_id_owned(parser, start, length);
if (constant_id != 0) yp_parser_local_add(parser, constant_id);
}
// Add a parameter name to the current scope and check whether the name of the
// parameter is unique or not.
static void
@ -4644,7 +4661,9 @@ yp_parser_parameter_name_check(yp_parser_t *parser, yp_token_t *name) {
}
}
// Pop the current scope off the scope stack.
// Pop the current scope off the scope stack. Note that we specifically do not
// free the associated constant list because we assume that we have already
// transferred ownership of the list to the AST somewhere.
static void
yp_parser_scope_pop(yp_parser_t *parser) {
yp_scope_t *scope = parser->current_scope;
@ -13757,7 +13776,10 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
uint32_t local_size = yp_metadata_read_u32(metadata);
metadata += 4;
yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
uint8_t *constant = malloc(local_size);
memcpy(constant, metadata, local_size);
yp_parser_local_add_owned(parser, constant, (size_t) local_size);
metadata += local_size;
}
}
@ -13896,6 +13918,15 @@ yp_parser_free(yp_parser_t *parser) {
yp_constant_pool_free(&parser->constant_pool);
yp_newline_list_free(&parser->newline_list);
while (parser->current_scope != NULL) {
// Normally, popping the scope doesn't free the locals since it is
// assumed that ownership has transferred to the AST. However if we have
// scopes while we're freeing the parser, it's likely they came from
// eval scopes and we need to free them explicitly here.
yp_constant_id_list_free(&parser->current_scope->locals);
yp_parser_scope_pop(parser);
}
while (parser->lex_modes.index >= YP_LEX_STACK_SIZE) {
lex_mode_pop(parser);
}