зеркало из https://github.com/github/ruby.git
Mark JIT code as writeable / executable depending on the situation
Some platforms don't want memory to be marked as writeable and executable at the same time. When we write to the code block, we calculate the OS page that the buffer position maps to. Then we call `mprotect` to allow writes on that particular page. As an optimization, we cache the "last written" aligned page which allows us to amortize the cost of the `mprotect` call. In other words, sequential writes to the same page will only call `mprotect` on the page once. When we're done writing, we call `mprotect` on the entire JIT buffer. This means we don't need to keep track of which pages were marked as writeable, we let the OS take care of that. Co-authored-by: John Hawthorn <john@hawthorn.email>
This commit is contained in:
Родитель
94ee88b38c
Коммит
157095b3a4
|
@ -401,7 +401,7 @@ void run_runtime_tests(void)
|
|||
int (*function)(void);
|
||||
function = (int (*)(void))mem_block;
|
||||
|
||||
#define TEST(BODY) cb_set_pos(cb, 0); BODY ret(cb); assert_equal(7, function());
|
||||
#define TEST(BODY) cb_set_pos(cb, 0); BODY ret(cb); cb_mark_all_executable(cb); assert_equal(7, function());
|
||||
|
||||
// add
|
||||
TEST({ mov(cb, RAX, imm_opnd(0)); add(cb, RAX, imm_opnd(7)); })
|
||||
|
|
48
yjit_asm.c
48
yjit_asm.c
|
@ -163,7 +163,7 @@ static uint8_t *alloc_exec_mem(uint32_t mem_size)
|
|||
mem_block = (uint8_t*)mmap(
|
||||
(void*)req_addr,
|
||||
mem_size,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
PROT_READ | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
|
||||
-1,
|
||||
0
|
||||
|
@ -184,7 +184,7 @@ static uint8_t *alloc_exec_mem(uint32_t mem_size)
|
|||
mem_block = (uint8_t*)mmap(
|
||||
(void*)alloc_exec_mem,
|
||||
mem_size,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
PROT_READ | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1,
|
||||
0
|
||||
|
@ -197,7 +197,7 @@ static uint8_t *alloc_exec_mem(uint32_t mem_size)
|
|||
mem_block = (uint8_t*)mmap(
|
||||
NULL,
|
||||
mem_size,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
PROT_READ | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1,
|
||||
0
|
||||
|
@ -210,9 +210,17 @@ static uint8_t *alloc_exec_mem(uint32_t mem_size)
|
|||
exit(-1);
|
||||
}
|
||||
|
||||
codeblock_t block;
|
||||
block.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
|
||||
block.mem_block = mem_block;
|
||||
block.mem_size = mem_size;
|
||||
|
||||
codeblock_t * cb = █
|
||||
// Fill the executable memory with INT3 (0xCC) so that
|
||||
// executing uninitialized memory will fault
|
||||
cb_mark_all_writeable(cb);
|
||||
memset(mem_block, 0xCC, mem_size);
|
||||
cb_mark_all_executable(cb);
|
||||
|
||||
return mem_block;
|
||||
#else
|
||||
|
@ -230,6 +238,7 @@ void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size)
|
|||
cb->write_pos = 0;
|
||||
cb->num_labels = 0;
|
||||
cb->num_refs = 0;
|
||||
cb->current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
|
||||
}
|
||||
|
||||
// Align the current write position to a multiple of bytes
|
||||
|
@ -277,6 +286,7 @@ void cb_write_byte(codeblock_t *cb, uint8_t byte)
|
|||
{
|
||||
assert (cb->mem_block);
|
||||
assert (cb->write_pos + 1 <= cb->mem_size);
|
||||
cb_mark_position_writeable(cb, cb->write_pos);
|
||||
cb->mem_block[cb->write_pos++] = byte;
|
||||
}
|
||||
|
||||
|
@ -1771,3 +1781,35 @@ void cb_write_lock_prefix(codeblock_t *cb)
|
|||
{
|
||||
cb_write_byte(cb, 0xF0);
|
||||
}
|
||||
|
||||
void cb_mark_all_writeable(codeblock_t * cb)
|
||||
{
|
||||
if (mprotect(cb->mem_block, cb->mem_size, PROT_READ | PROT_WRITE)) {
|
||||
fprintf(stderr, "Couldn't make JIT page (%p) writeable, errno: %s", (void *)cb->mem_block, strerror(errno));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void cb_mark_position_writeable(codeblock_t * cb, uint32_t write_pos)
|
||||
{
|
||||
uint32_t pagesize = (uint32_t)sysconf(_SC_PAGESIZE);
|
||||
uint32_t aligned_position = (write_pos / pagesize) * pagesize;
|
||||
|
||||
if (cb->current_aligned_write_pos != aligned_position) {
|
||||
cb->current_aligned_write_pos = aligned_position;
|
||||
if (mprotect(cb->mem_block + aligned_position, pagesize, PROT_READ | PROT_WRITE)) {
|
||||
fprintf(stderr, "Couldn't make JIT page (%p) writeable, errno: %s", (void *)(cb->mem_block + aligned_position), strerror(errno));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cb_mark_all_executable(codeblock_t * cb)
|
||||
{
|
||||
cb->current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
|
||||
if (mprotect(cb->mem_block, cb->mem_size, PROT_READ | PROT_EXEC)) {
|
||||
fprintf(stderr, "Couldn't make JIT page (%p) executable, errno: %s", (void *)cb->mem_block, strerror(errno));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,8 +55,14 @@ typedef struct CodeBlock
|
|||
// Flag to enable or disable comments
|
||||
bool has_asm;
|
||||
|
||||
// Keep track of the current aligned write position.
|
||||
// Used for changing protection when writing to the JIT buffer
|
||||
uint32_t current_aligned_write_pos;
|
||||
} codeblock_t;
|
||||
|
||||
// 1 is not aligned so this won't match any pages
|
||||
#define ALIGNED_WRITE_POSITION_NONE 1
|
||||
|
||||
enum OpndType
|
||||
{
|
||||
OPND_NONE,
|
||||
|
@ -261,6 +267,9 @@ static inline uint32_t cb_new_label(codeblock_t *cb, const char *name);
|
|||
static inline void cb_write_label(codeblock_t *cb, uint32_t label_idx);
|
||||
static inline void cb_label_ref(codeblock_t *cb, uint32_t label_idx);
|
||||
static inline void cb_link_labels(codeblock_t *cb);
|
||||
static inline void cb_mark_all_writeable(codeblock_t *cb);
|
||||
static inline void cb_mark_position_writeable(codeblock_t *cb, uint32_t write_pos);
|
||||
static inline void cb_mark_all_executable(codeblock_t *cb);
|
||||
|
||||
// Encode individual instructions into a code block
|
||||
static inline void add(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
|
||||
|
|
|
@ -4876,6 +4876,8 @@ rb_yjit_tracing_invalidate_all(void)
|
|||
RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
|
||||
yjit_codepage_frozen_bytes = old_pos;
|
||||
|
||||
cb_mark_all_executable(ocb);
|
||||
cb_mark_all_executable(cb);
|
||||
RB_VM_LOCK_LEAVE();
|
||||
}
|
||||
|
||||
|
@ -4957,6 +4959,7 @@ yjit_init_codegen(void)
|
|||
|
||||
// Generate full exit code for C func
|
||||
gen_full_cfunc_return();
|
||||
cb_mark_all_executable(cb);
|
||||
|
||||
// Map YARV opcodes to the corresponding codegen functions
|
||||
yjit_reg_op(BIN(nop), gen_nop);
|
||||
|
|
13
yjit_core.c
13
yjit_core.c
|
@ -833,12 +833,16 @@ gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t
|
|||
// The entry context makes no assumptions about types
|
||||
blockid_t blockid = { iseq, insn_idx };
|
||||
|
||||
rb_vm_barrier();
|
||||
// Write the interpreter entry prologue. Might be NULL when out of memory.
|
||||
uint8_t *code_ptr = yjit_entry_prologue(cb, iseq);
|
||||
|
||||
// Try to generate code for the entry block
|
||||
block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec);
|
||||
|
||||
cb_mark_all_executable(ocb);
|
||||
cb_mark_all_executable(cb);
|
||||
|
||||
// If we couldn't generate any code
|
||||
if (!block || block->end_idx == insn_idx) {
|
||||
return NULL;
|
||||
|
@ -872,6 +876,8 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
|
|||
dst_addr = branch->dst_addrs[target_idx];
|
||||
}
|
||||
else {
|
||||
rb_vm_barrier();
|
||||
|
||||
// :stub-sp-flush:
|
||||
// Generated code do stack operations without modifying cfp->sp, while the
|
||||
// cfp->sp tells the GC what values on the stack to root. Generated code
|
||||
|
@ -952,6 +958,9 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
|
|||
// frame. We do that in code_for_exit_from_stub.
|
||||
dst_addr = code_for_exit_from_stub;
|
||||
}
|
||||
|
||||
cb_mark_all_executable(ocb);
|
||||
cb_mark_all_executable(cb);
|
||||
}
|
||||
|
||||
const ptrdiff_t new_branch_size = branch_code_size(branch);
|
||||
|
@ -1201,6 +1210,7 @@ static void
|
|||
invalidate_block_version(block_t *block)
|
||||
{
|
||||
ASSERT_vm_locking();
|
||||
|
||||
// TODO: want to assert that all other ractors are stopped here. Can't patch
|
||||
// machine code that some other thread is running.
|
||||
|
||||
|
@ -1324,6 +1334,9 @@ invalidate_block_version(block_t *block)
|
|||
yjit_runtime_counters.invalidation_count++;
|
||||
#endif
|
||||
|
||||
cb_mark_all_executable(ocb);
|
||||
cb_mark_all_executable(cb);
|
||||
|
||||
// fprintf(stderr, "invalidation done\n");
|
||||
}
|
||||
|
||||
|
|
11
yjit_iface.c
11
yjit_iface.c
|
@ -478,7 +478,7 @@ rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec)
|
|||
#if (OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE) && JIT_ENABLED
|
||||
bool success = true;
|
||||
RB_VM_LOCK_ENTER();
|
||||
// TODO: I think we need to stop all other ractors here
|
||||
rb_vm_barrier();
|
||||
|
||||
// Compile a block version starting at the first instruction
|
||||
uint8_t *code_ptr = gen_entry_point(iseq, 0, ec);
|
||||
|
@ -914,6 +914,8 @@ rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body)
|
|||
void
|
||||
rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body)
|
||||
{
|
||||
rb_vm_barrier();
|
||||
|
||||
rb_darray_for(body->yjit_blocks, version_array_idx) {
|
||||
rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
|
||||
|
||||
|
@ -947,6 +949,11 @@ rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body)
|
|||
VALUE possibly_moved = rb_gc_location(object);
|
||||
// Only write when the VALUE moves, to be CoW friendly.
|
||||
if (possibly_moved != object) {
|
||||
// Possibly unlock the page we need to update
|
||||
cb_mark_position_writeable(cb, offset_to_value);
|
||||
|
||||
// Object could cross a page boundary, so unlock there as well
|
||||
cb_mark_position_writeable(cb, offset_to_value + SIZEOF_VALUE - 1);
|
||||
memcpy(value_address, &possibly_moved, SIZEOF_VALUE);
|
||||
}
|
||||
}
|
||||
|
@ -955,6 +962,8 @@ rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body)
|
|||
//block->code_page = rb_gc_location(block->code_page);
|
||||
}
|
||||
}
|
||||
cb_mark_all_executable(cb);
|
||||
cb_mark_all_executable(ocb);
|
||||
}
|
||||
|
||||
// Free the yjit resources associated with an iseq
|
||||
|
|
Загрузка…
Ссылка в новой задаче