зеркало из https://github.com/github/ruby.git
Progress on x86 assembler. Encode a few simple instructions.
This commit is contained in:
Родитель
5cf7ccd24a
Коммит
8f40a62647
|
@ -151,7 +151,6 @@ COMMONOBJS = array.$(OBJEXT) \
|
|||
vm_sync.$(OBJEXT) \
|
||||
vm_trace.$(OBJEXT) \
|
||||
ujit_asm.$(OBJEXT) \
|
||||
ujit_asm_tests.$(OBJEXT) \
|
||||
$(COROUTINE_OBJ) \
|
||||
$(DTRACE_OBJ) \
|
||||
$(BUILTIN_ENCOBJS) \
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
# NOTE: I did not know what would be the sensible way to compile
|
||||
# and run these tests from the Ruby makefile
|
||||
|
||||
clang -std=c99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test
|
||||
clear
|
||||
|
||||
clang -std=gnu99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test
|
||||
|
||||
./asm_test
|
||||
|
||||
rm asm_test
|
||||
|
|
84
ujit_asm.c
84
ujit_asm.c
|
@ -11,6 +11,24 @@
|
|||
// TODO: give ujit_examples.h some more meaningful file name
|
||||
#include "ujit_examples.h"
|
||||
|
||||
// 64-bit GP registers
|
||||
const x86opnd_t RAX = { OPND_REG, 64, .reg = { REG_GP, 0 }};
|
||||
const x86opnd_t RCX = { OPND_REG, 64, .reg = { REG_GP, 1 }};
|
||||
const x86opnd_t RDX = { OPND_REG, 64, .reg = { REG_GP, 2 }};
|
||||
const x86opnd_t RBX = { OPND_REG, 64, .reg = { REG_GP, 3 }};
|
||||
const x86opnd_t RSP = { OPND_REG, 64, .reg = { REG_GP, 4 }};
|
||||
const x86opnd_t RBP = { OPND_REG, 64, .reg = { REG_GP, 5 }};
|
||||
const x86opnd_t RSI = { OPND_REG, 64, .reg = { REG_GP, 6 }};
|
||||
const x86opnd_t RDI = { OPND_REG, 64, .reg = { REG_GP, 7 }};
|
||||
const x86opnd_t R8 = { OPND_REG, 64, .reg = { REG_GP, 8 }};
|
||||
const x86opnd_t R9 = { OPND_REG, 64, .reg = { REG_GP, 9 }};
|
||||
const x86opnd_t R10 = { OPND_REG, 64, .reg = { REG_GP, 10 }};
|
||||
const x86opnd_t R11 = { OPND_REG, 64, .reg = { REG_GP, 11 }};
|
||||
const x86opnd_t R12 = { OPND_REG, 64, .reg = { REG_GP, 12 }};
|
||||
const x86opnd_t R13 = { OPND_REG, 64, .reg = { REG_GP, 13 }};
|
||||
const x86opnd_t R14 = { OPND_REG, 64, .reg = { REG_GP, 14 }};
|
||||
const x86opnd_t R15 = { OPND_REG, 64, .reg = { REG_GP, 15 }};
|
||||
|
||||
void cb_init(codeblock_t* cb, size_t mem_size)
|
||||
{
|
||||
// Map the memory as executable
|
||||
|
@ -36,6 +54,15 @@ void cb_init(codeblock_t* cb, size_t mem_size)
|
|||
cb->num_refs = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
Set the current write position
|
||||
*/
|
||||
void cb_set_pos(codeblock_t* cb, size_t pos)
|
||||
{
|
||||
assert (pos < cb->mem_size);
|
||||
cb->write_pos = pos;
|
||||
}
|
||||
|
||||
// Get a direct pointer into the executable memory block
|
||||
uint8_t* cb_get_ptr(codeblock_t* cb, size_t index)
|
||||
{
|
||||
|
@ -128,8 +155,27 @@ void cb_write_epilogue(codeblock_t* cb)
|
|||
cb_write_byte(cb, ujit_post_call_bytes[i]);
|
||||
}
|
||||
|
||||
// Check if an operand needs a rex byte to be encoded
|
||||
bool rex_needed(x86opnd_t opnd)
|
||||
{
|
||||
if (opnd.type == OPND_REG)
|
||||
{
|
||||
return (
|
||||
opnd.reg.reg_no > 7 ||
|
||||
(opnd.num_bits == 8 && opnd.reg.reg_no >= 4 && opnd.reg.reg_no <= 7)
|
||||
);
|
||||
}
|
||||
|
||||
if (opnd.type == OPND_MEM)
|
||||
{
|
||||
return (opnd.mem.base_reg_no > 7) || (opnd.mem.has_idx && opnd.mem.idx_reg_no > 7);
|
||||
}
|
||||
|
||||
assert (false);
|
||||
}
|
||||
|
||||
// Write the REX byte
|
||||
void writeREX(
|
||||
static void cb_write_rex(
|
||||
codeblock_t* cb,
|
||||
bool w_flag,
|
||||
uint8_t reg_no,
|
||||
|
@ -153,13 +199,12 @@ void writeREX(
|
|||
}
|
||||
|
||||
// Write an opcode byte with an embedded register operand
|
||||
/*static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, X86Reg rOpnd)
|
||||
static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, x86opnd_t reg)
|
||||
{
|
||||
// Write the reg field into the opcode byte
|
||||
uint8_t op_byte = opcode | (rOpnd.regNo & 7);
|
||||
uint8_t op_byte = opcode | (reg.reg.reg_no & 7);
|
||||
cb_write_byte(cb, op_byte);
|
||||
}
|
||||
*/
|
||||
|
||||
// nop - Noop, one or multiple bytes long
|
||||
void nop(codeblock_t* cb, size_t length)
|
||||
|
@ -228,28 +273,35 @@ void nop(codeblock_t* cb, size_t length)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
/// push - Push a register on the stack
|
||||
void push(codeblock_t* cb, X86Reg reg)
|
||||
void push(codeblock_t* cb, x86opnd_t reg)
|
||||
{
|
||||
assert (reg.size is 64, "can only push 64-bit registers");
|
||||
assert (reg.num_bits == 64);
|
||||
|
||||
//cb.writeASM("push", reg);
|
||||
|
||||
if (reg.rexNeeded)
|
||||
cb_write_rex(cb, false, 0, 0, reg.regNo);
|
||||
cb_write_byte(cb, 0x50, reg);
|
||||
if (rex_needed(reg))
|
||||
cb_write_rex(cb, false, 0, 0, reg.reg.reg_no);
|
||||
|
||||
cb_write_opcode(cb, 0x50, reg);
|
||||
}
|
||||
|
||||
/// pop - Pop a register off the stack
|
||||
void pop(codeblock_t* cb, X86Reg reg)
|
||||
void pop(codeblock_t* cb, x86opnd_t reg)
|
||||
{
|
||||
assert (reg.size is 64);
|
||||
assert (reg.num_bits == 64);
|
||||
|
||||
//cb.writeASM("pop", reg);
|
||||
|
||||
if (reg.rexNeeded)
|
||||
cb_write_rex(false, 0, 0, reg.regNo);
|
||||
cb_write_byte(cb, 0x58, reg);
|
||||
if (rex_needed(reg))
|
||||
cb_write_rex(cb, false, 0, 0, reg.reg.reg_no);
|
||||
|
||||
cb_write_opcode(cb, 0x58, reg);
|
||||
}
|
||||
|
||||
/// ret - Return from call, popping only the return address
|
||||
void ret(codeblock_t* cb)
|
||||
{
|
||||
//cb.writeASM("ret");
|
||||
cb_write_byte(cb, 0xC3);
|
||||
}
|
||||
*/
|
||||
|
|
96
ujit_asm.h
96
ujit_asm.h
|
@ -11,9 +11,10 @@
|
|||
// Maximum number of label references
|
||||
#define MAX_LABEL_REFS 32
|
||||
|
||||
// Reference to an ASM label
|
||||
typedef struct LabelRef
|
||||
{
|
||||
// Position where the label reference is in the code block
|
||||
// Position in the code block where the label reference exists
|
||||
size_t pos;
|
||||
|
||||
// Label which this refers to
|
||||
|
@ -21,6 +22,7 @@ typedef struct LabelRef
|
|||
|
||||
} labelref_t;
|
||||
|
||||
// Block of executable memory into which instructions can be written
|
||||
typedef struct CodeBlock
|
||||
{
|
||||
// Memory block
|
||||
|
@ -51,15 +53,101 @@ typedef struct CodeBlock
|
|||
|
||||
} codeblock_t;
|
||||
|
||||
enum OpndType
|
||||
{
|
||||
OPND_NONE,
|
||||
OPND_REG,
|
||||
OPND_IMM,
|
||||
OPND_MEM,
|
||||
OPND_IPREL
|
||||
};
|
||||
|
||||
enum RegType
|
||||
{
|
||||
REG_GP,
|
||||
REG_FP,
|
||||
REG_XMM,
|
||||
REG_IP
|
||||
};
|
||||
|
||||
typedef struct X86Reg
|
||||
{
|
||||
// Register type
|
||||
uint8_t reg_type;
|
||||
|
||||
// Register index number
|
||||
uint8_t reg_no;
|
||||
|
||||
} x86reg_t;
|
||||
|
||||
typedef struct X86Mem
|
||||
{
|
||||
/// Base register number
|
||||
uint8_t base_reg_no;
|
||||
|
||||
/// Index register number
|
||||
uint8_t idx_reg_no;
|
||||
|
||||
/// SIB scale exponent value (power of two, two bits)
|
||||
uint8_t scale_exp;
|
||||
|
||||
/// Has index register flag
|
||||
bool has_idx;
|
||||
|
||||
// FIXME: do we need this, or can base reg just be RIP?
|
||||
/// IP-relative addressing flag
|
||||
bool is_iprel;
|
||||
|
||||
/// Constant displacement from the base, not scaled
|
||||
int32_t disp;
|
||||
|
||||
} x86mem_t;
|
||||
|
||||
typedef struct X86Opnd
|
||||
{
|
||||
// Operand type
|
||||
uint8_t type;
|
||||
|
||||
// Size in bits
|
||||
uint16_t num_bits;
|
||||
|
||||
union
|
||||
{
|
||||
// Register operand
|
||||
x86reg_t reg;
|
||||
|
||||
// Memory operand
|
||||
x86mem_t mem;
|
||||
|
||||
// Signed immediate value
|
||||
int64_t imm;
|
||||
|
||||
// Unsigned immediate value
|
||||
uint64_t unsgImm;
|
||||
};
|
||||
|
||||
} x86opnd_t;
|
||||
|
||||
// 64-bit GP registers
|
||||
const x86opnd_t RAX;
|
||||
const x86opnd_t RCX;
|
||||
const x86opnd_t RDX;
|
||||
const x86opnd_t RBX;
|
||||
const x86opnd_t RBP;
|
||||
const x86opnd_t RSP;
|
||||
const x86opnd_t RSI;
|
||||
const x86opnd_t RDI;
|
||||
const x86opnd_t R8;
|
||||
const x86opnd_t R9;
|
||||
const x86opnd_t R10;
|
||||
const x86opnd_t R11;
|
||||
const x86opnd_t R12;
|
||||
const x86opnd_t R13;
|
||||
const x86opnd_t R14;
|
||||
const x86opnd_t R15;
|
||||
|
||||
void cb_init(codeblock_t* cb, size_t mem_size);
|
||||
void cb_set_pos(codeblock_t* cb, size_t pos);
|
||||
uint8_t* cb_get_ptr(codeblock_t* cb, size_t index);
|
||||
void cb_write_byte(codeblock_t* cb, uint8_t byte);
|
||||
void cb_write_bytes(codeblock_t* cb, size_t num_bytes, ...);
|
||||
|
@ -69,7 +157,13 @@ void cb_write_int(codeblock_t* cb, uint64_t val, size_t num_bits);
|
|||
void cb_write_prologue(codeblock_t* cb);
|
||||
void cb_write_epilogue(codeblock_t* cb);
|
||||
|
||||
// Encode individual instructions into a code block
|
||||
void nop(codeblock_t* cb, size_t length);
|
||||
void push(codeblock_t* cb, x86opnd_t reg);
|
||||
void pop(codeblock_t* cb, x86opnd_t reg);
|
||||
void ret(codeblock_t* cb);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,27 +1,68 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "ujit_asm.h"
|
||||
|
||||
//fprintf(stderr, format);
|
||||
//exit(-1)
|
||||
// Check that the code block contains the given sequence of bytes
|
||||
void check_bytes(codeblock_t* cb, const char* bytes)
|
||||
{
|
||||
printf("checking encoding: %s\n", bytes);
|
||||
|
||||
// TODO: make a macro to test encoding sequences
|
||||
// ***You can use sizeof to know the length***
|
||||
// CHECK_BYTES(cb, {})
|
||||
size_t len = strlen(bytes);
|
||||
assert (len % 2 == 0);
|
||||
size_t num_bytes = len / 2;
|
||||
|
||||
if (cb->write_pos != num_bytes)
|
||||
{
|
||||
fprintf(stderr, "incorrect encoding length %ld\n", cb->write_pos);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_bytes; ++i)
|
||||
{
|
||||
char byte_str[] = {0, 0, 0, 0};
|
||||
strncpy(byte_str, bytes + (2 * i), 2);
|
||||
//printf("%ld: %s\n", i, byte_str);
|
||||
|
||||
char* endptr;
|
||||
long int byte = strtol(byte_str, &endptr, 16);
|
||||
|
||||
uint8_t cb_byte = cb->mem_block[i];
|
||||
|
||||
if (cb_byte != byte)
|
||||
{
|
||||
fprintf(stderr, "incorrect encoding at position %ld\n", i);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void run_tests()
|
||||
{
|
||||
printf("Running assembler tests\n");
|
||||
|
||||
codeblock_t cb;
|
||||
cb_init(&cb, 4096);
|
||||
codeblock_t cb_obj;
|
||||
codeblock_t* cb = &cb_obj;
|
||||
cb_init(cb, 4096);
|
||||
cb_write_prologue(cb);
|
||||
cb_write_epilogue(cb);
|
||||
|
||||
// pop
|
||||
cb_set_pos(cb, 0); pop(cb, RAX); check_bytes(cb, "58");
|
||||
cb_set_pos(cb, 0); pop(cb, RBX); check_bytes(cb, "5B");
|
||||
cb_set_pos(cb, 0); pop(cb, RSP); check_bytes(cb, "5C");
|
||||
cb_set_pos(cb, 0); pop(cb, RBP); check_bytes(cb, "5D");
|
||||
cb_set_pos(cb, 0); pop(cb, R12); check_bytes(cb, "415C");
|
||||
|
||||
// push
|
||||
cb_set_pos(cb, 0); push(cb, RAX); check_bytes(cb, "50");
|
||||
cb_set_pos(cb, 0); push(cb, RBX); check_bytes(cb, "53");
|
||||
cb_set_pos(cb, 0); push(cb, R12); check_bytes(cb, "4154");
|
||||
|
||||
// ret
|
||||
cb_set_pos(cb, 0); ret(cb); check_bytes(cb, "C3");
|
||||
|
||||
cb_write_prologue(&cb);
|
||||
cb_write_epilogue(&cb);
|
||||
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче