py: Reorganise bytecode layout so it's more structured, easier to edit.
diff --git a/py/bc.c b/py/bc.c
index 3e9125d..a4ee847 100644
--- a/py/bc.c
+++ b/py/bc.c
@@ -84,10 +84,8 @@
// On entry code_state should be allocated somewhere (stack/heap) and
// contain the following valid entries:
-// - code_state->code_info should be the offset in bytes from the start of
-// the bytecode chunk to the start of the code-info within the bytecode
// - code_state->ip should contain the offset in bytes from the start of
-// the bytecode chunk to the start of the prelude within the bytecode
+// the bytecode chunk to just after n_state and n_exc_stack
// - code_state->n_state should be set to the state size (locals plus stack)
void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
// This function is pretty complicated. It's main aim is to be efficient in speed and RAM
@@ -95,10 +93,16 @@
mp_obj_fun_bc_t *self = self_in;
mp_uint_t n_state = code_state->n_state;
+ // ip comes in as an offset into bytecode, so turn it into a true pointer
+ code_state->ip = self->bytecode + (mp_uint_t)code_state->ip;
+
#if MICROPY_STACKLESS
code_state->prev = NULL;
#endif
- code_state->code_info = self->bytecode + (mp_uint_t)code_state->code_info;
+
+ // align ip
+ code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t));
+
code_state->sp = &code_state->state[0] - 1;
code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1;
@@ -156,13 +160,8 @@
*var_pos_kw_args = dict;
}
- // get pointer to arg_names array at start of bytecode prelude
- const mp_obj_t *arg_names;
- {
- const byte *code_info = code_state->code_info;
- mp_uint_t code_info_size = mp_decode_uint(&code_info);
- arg_names = (const mp_obj_t*)(code_state->code_info + code_info_size);
- }
+ // get pointer to arg_names array
+ const mp_obj_t *arg_names = (const mp_obj_t*)code_state->ip;
for (mp_uint_t i = 0; i < n_kw; i++) {
mp_obj_t wanted_arg_name = kwargs[2 * i];
@@ -235,8 +234,19 @@
}
}
+ // get the ip and skip argument names
+ const byte *ip = code_state->ip;
+ ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_uint_t);
+
+ // store pointer to code_info and jump over it
+ {
+ code_state->code_info = ip;
+ const byte *ip2 = ip;
+ mp_uint_t code_info_size = mp_decode_uint(&ip2);
+ ip += code_info_size;
+ }
+
// bytecode prelude: initialise closed over variables
- const byte *ip = self->bytecode + (mp_uint_t)code_state->ip;
mp_uint_t local_num;
while ((local_num = *ip++) != 255) {
code_state->state[n_state - 1 - local_num] =
diff --git a/py/bc.h b/py/bc.h
index b4b4d8c..73b67bc 100644
--- a/py/bc.h
+++ b/py/bc.h
@@ -29,6 +29,30 @@
#include "py/runtime.h"
#include "py/obj.h"
+// bytecode layout:
+//
+// n_state : var uint
+// n_exc_stack : var uint
+//
+// <word alignment padding>
+//
+// argname0 : obj (qstr)
+// ... : obj (qstr)
+// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args
+//
+// code_info_size : var uint | code_info_size counts bytes in this chunk
+// simple_name : var qstr |
+// source_file : var qstr |
+// <line number info> |
+// <word alignment padding> |
+//
+// num_cells : byte number of locals that are cells
+// local_num0 : byte
+// ... : byte
+// local_numN : byte N = num_cells
+//
+// <bytecode>
+
// Exception stack entry
typedef struct _mp_exc_stack {
const byte *handler;
diff --git a/py/emitbc.c b/py/emitbc.c
index 71ed4af..0ed7828 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -111,7 +111,11 @@
emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
}
-STATIC void emit_write_code_info_uint(emit_t *emit, mp_uint_t val) {
+STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
+ *emit_get_cur_to_write_code_info(emit, 1) = val;
+}
+
+STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
emit_write_uint(emit, emit_get_cur_to_write_code_info, val);
}
@@ -119,6 +123,13 @@
emit_write_uint(emit, emit_get_cur_to_write_code_info, qst);
}
+STATIC void emit_write_code_info_prealigned_ptr(emit_t* emit, void *ptr) {
+ mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_code_info(emit, sizeof(mp_uint_t));
+ // Verify thar c is already uint-aligned
+ assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
+ *c = (mp_uint_t)ptr;
+}
+
#if MICROPY_ENABLE_SOURCE_LINE
STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_skip, mp_uint_t lines_to_skip) {
assert(bytes_to_skip > 0 || lines_to_skip > 0);
@@ -167,11 +178,7 @@
c[0] = b1;
}
-STATIC void emit_write_bytecode_uint(emit_t *emit, mp_uint_t val) {
- emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
-}
-
-STATIC void emit_write_bytecode_byte_byte(emit_t *emit, byte b1, byte b2) {
+STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, byte b2) {
assert((b2 & (~0xff)) == 0);
byte *c = emit_get_cur_to_write_bytecode(emit, 2);
c[0] = b1;
@@ -210,13 +217,6 @@
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
}
-STATIC void emit_write_bytecode_prealigned_ptr(emit_t *emit, void *ptr) {
- mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t));
- // Verify thar c is already uint-aligned
- assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
- *c = (mp_uint_t)ptr;
-}
-
// aligns the pointer so it is friendly to GC
STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
emit_write_bytecode_byte(emit, b);
@@ -227,15 +227,7 @@
*c = (mp_uint_t)ptr;
}
-/* currently unused
-STATIC void emit_write_bytecode_byte_uint_uint(emit_t *emit, byte b, mp_uint_t num1, mp_uint_t num2) {
- emit_write_bytecode_byte(emit, b);
- emit_write_bytecode_byte_uint(emit, num1);
- emit_write_bytecode_byte_uint(emit, num2);
-}
-*/
-
-STATIC void emit_write_bytecode_byte_qstr(emit_t *emit, byte b, qstr qst) {
+STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) {
emit_write_bytecode_byte_uint(emit, b, qst);
}
@@ -289,19 +281,26 @@
emit->bytecode_offset = 0;
emit->code_info_offset = 0;
- // Write code info size as compressed uint. If we are not in the final pass
- // then space for this uint is reserved in emit_bc_end_pass.
- if (pass == MP_PASS_EMIT) {
- emit_write_code_info_uint(emit, emit->code_info_size);
+ // Write local state size and exception stack size.
+ {
+ mp_uint_t n_state = scope->num_locals + scope->stack_size;
+ if (n_state == 0) {
+ // Need at least 1 entry in the state, in the case an exception is
+ // propagated through this function, the exception is returned in
+ // the highest slot in the state (fastn[0], see vm.c).
+ n_state = 1;
+ }
+ emit_write_code_info_uint(emit, n_state);
+ emit_write_code_info_uint(emit, scope->exc_stack_size);
}
- // write the name and source file of this function
- emit_write_code_info_qstr(emit, scope->simple_name);
- emit_write_code_info_qstr(emit, scope->source_file);
+ // Align code-info so that following pointers are aligned on a machine word.
+ emit_align_code_info_to_machine_word(emit);
- // bytecode prelude: argument names (needed to resolve positional args passed as keywords)
- // we store them as full word-sized objects for efficient access in mp_setup_code_state
- // this is the start of the prelude and is guaranteed to be aligned on a word boundary
+ // Write argument names (needed to resolve positional args passed as
+ // keywords). We store them as full word-sized objects for efficient access
+ // in mp_setup_code_state this is the start of the prelude and is guaranteed
+ // to be aligned on a word boundary.
{
// For a given argument position (indexed by i) we need to find the
// corresponding id_info which is a parameter, as it has the correct
@@ -322,23 +321,23 @@
break;
}
}
- emit_write_bytecode_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
+ emit_write_code_info_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
}
}
- // bytecode prelude: local state size and exception stack size
- {
- mp_uint_t n_state = scope->num_locals + scope->stack_size;
- if (n_state == 0) {
- // Need at least 1 entry in the state, in the case an exception is
- // propagated through this function, the exception is returned in
- // the highest slot in the state (fastn[0], see vm.c).
- n_state = 1;
- }
- emit_write_bytecode_uint(emit, n_state);
- emit_write_bytecode_uint(emit, scope->exc_stack_size);
+ // Write size of the rest of the code info. We don't know how big this
+ // variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes
+ // for it and hope that is enough! TODO assert this or something.
+ if (pass == MP_PASS_EMIT) {
+ emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset);
+ } else {
+ emit_get_cur_to_write_code_info(emit, 2);
}
+ // Write the name and source file of this function.
+ emit_write_code_info_qstr(emit, scope->simple_name);
+ emit_write_code_info_qstr(emit, scope->source_file);
+
// bytecode prelude: initialise closed over variables
for (int i = 0; i < scope->id_info_len; i++) {
id_info_t *id = &scope->id_info[i];
@@ -360,25 +359,10 @@
mp_printf(&mp_plat_print, "ERROR: stack size not back to zero; got %d\n", emit->stack_size);
}
- *emit_get_cur_to_write_code_info(emit, 1) = 0; // end of line number info
+ emit_write_code_info_byte(emit, 0); // end of line number info
if (emit->pass == MP_PASS_CODE_SIZE) {
- // Need to make sure we have enough room in the code-info block to write
- // the size of the code-info block. Since the size is written as a
- // compressed uint, we don't know its size until we write it! Thus, we
- // take the biggest possible value it could be and write that here.
- // Then there will be enough room to write the value, and any leftover
- // space will be absorbed in the alignment at the end of the code-info
- // block.
- mp_uint_t max_code_info_size =
- emit->code_info_offset // current code-info size
- + BYTES_FOR_INT // maximum space for compressed uint
- + BYTES_PER_WORD - 1; // maximum space for alignment padding
- emit_write_code_info_uint(emit, max_code_info_size);
-
- // Align code-info so that following bytecode is aligned on a machine word.
- // We don't need to write anything here, it's just dead space between the
- // code-info block and the bytecode block that follows it.
+ // so bytecode is aligned
emit_align_code_info_to_machine_word(emit);
// calculate size of total code-info + bytecode, in bytes
diff --git a/py/emitnative.c b/py/emitnative.c
index d0614d3..1fcc843 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -566,8 +566,6 @@
stack_info_t *stack_info;
vtype_kind_t saved_stack_vtype;
- int code_info_size;
- int code_info_offset;
int prelude_offset;
int n_state;
int stack_start;
@@ -774,10 +772,6 @@
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
#endif
- // set code_state.code_info (offset from start of this function to code_info data)
- // XXX this encoding may change size
- ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->code_info_offset, offsetof(mp_code_state, code_info) / sizeof(mp_uint_t), REG_ARG_1);
-
// set code_state.ip (offset from start of this function to prelude info)
// XXX this encoding may change size
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state, ip) / sizeof(mp_uint_t), REG_ARG_1);
@@ -829,11 +823,10 @@
}
if (!emit->do_viper_types) {
- // write dummy code info (for mp_setup_code_state to parse) and arg names
- emit->code_info_offset = ASM_GET_CODE_POS(emit->as);
- ASM_DATA(emit->as, 1, emit->code_info_size);
+ emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
ASM_ALIGN(emit->as, ASM_WORD_SIZE);
- emit->code_info_size = ASM_GET_CODE_POS(emit->as) - emit->code_info_offset;
+
+ // write argument names as qstr objects
// see comment in corresponding part of emitbc.c about the logic here
for (int i = 0; i < emit->scope->num_pos_args + emit->scope->num_kwonly_args; i++) {
qstr qst = MP_QSTR__star_;
@@ -847,8 +840,10 @@
ASM_DATA(emit->as, ASM_WORD_SIZE, (mp_uint_t)MP_OBJ_NEW_QSTR(qst));
}
+ // write dummy code info (for mp_setup_code_state to parse)
+ ASM_DATA(emit->as, 1, 1);
+
// bytecode prelude: initialise closed over variables
- emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
for (int i = 0; i < emit->scope->id_info_len; i++) {
id_info_t *id = &emit->scope->id_info[i];
if (id->kind == ID_INFO_KIND_CELL) {
diff --git a/py/objfun.c b/py/objfun.c
index 774d8e64..53ddb0a 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -121,8 +121,13 @@
return MP_QSTR_;
}
#endif
- const byte *code_info = fun->bytecode;
- return mp_obj_code_get_name(code_info);
+
+ const byte *bc = fun->bytecode;
+ mp_decode_uint(&bc); // skip n_state
+ mp_decode_uint(&bc); // skip n_exc_stack
+ bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align
+ bc += (fun->n_pos_args + fun->n_kwonly_args) * sizeof(mp_uint_t); // skip arg names
+ return mp_obj_code_get_name(bc);
}
#if MICROPY_CPYTHON_COMPAT
@@ -158,13 +163,8 @@
MP_STACK_CHECK();
mp_obj_fun_bc_t *self = self_in;
- // skip code-info block
- const byte *code_info = self->bytecode;
- mp_uint_t code_info_size = mp_decode_uint(&code_info);
- const byte *ip = self->bytecode + code_info_size;
-
- // bytecode prelude: skip arg names
- ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
+ // get start of bytecode
+ const byte *ip = self->bytecode;
// bytecode prelude: state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
@@ -178,9 +178,8 @@
return NULL;
}
+ code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
code_state->n_state = n_state;
- code_state->code_info = 0; // offset to code-info
- code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
// execute the byte code with the correct globals context
@@ -202,13 +201,8 @@
mp_obj_fun_bc_t *self = self_in;
DEBUG_printf("Func n_def_args: %d\n", self->n_def_args);
- // skip code-info block
- const byte *code_info = self->bytecode;
- mp_uint_t code_info_size = mp_decode_uint(&code_info);
- const byte *ip = self->bytecode + code_info_size;
-
- // bytecode prelude: skip arg names
- ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
+ // get start of bytecode
+ const byte *ip = self->bytecode;
// bytecode prelude: state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
@@ -229,9 +223,8 @@
state_size = 0; // indicate that we allocated using alloca
}
+ code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
code_state->n_state = n_state;
- code_state->code_info = 0; // offset to code-info
- code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
// execute the byte code with the correct globals context
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 4ba6bf7..f7b637e 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -54,13 +54,8 @@
mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun;
assert(MP_OBJ_IS_TYPE(self_fun, &mp_type_fun_bc));
- // skip code-info block
- const byte *code_info = self_fun->bytecode;
- mp_uint_t code_info_size = mp_decode_uint(&code_info);
- const byte *ip = self_fun->bytecode + code_info_size;
-
- // bytecode prelude: skip arg names
- ip += (self_fun->n_pos_args + self_fun->n_kwonly_args) * sizeof(mp_obj_t);
+ // get start of bytecode
+ const byte *ip = self_fun->bytecode;
// bytecode prelude: get state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
diff --git a/py/showbc.c b/py/showbc.c
index 2da8d3f..87e7c6a 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -57,7 +57,16 @@
void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip, mp_uint_t len) {
mp_showbc_code_start = ip;
- // get code info size
+ // get state size and exception stack size
+ mp_uint_t n_state = mp_decode_uint(&ip);
+ mp_uint_t n_exc_stack = mp_decode_uint(&ip);
+
+ ip = MP_ALIGN(ip, sizeof(mp_uint_t));
+
+ // get and skip arg names
+ const mp_obj_t *arg_names = (const mp_obj_t*)ip;
+ ip += n_total_args * sizeof(mp_uint_t);
+
const byte *code_info = ip;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
ip += code_info_size;
@@ -65,7 +74,7 @@
qstr block_name = mp_decode_uint(&code_info);
qstr source_file = mp_decode_uint(&code_info);
printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n",
- qstr_str(source_file), qstr_str(block_name), descr, code_info, len);
+ qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len);
// raw bytecode dump
printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", code_info_size, len - code_info_size);
@@ -80,18 +89,15 @@
// bytecode prelude: arg names (as qstr objects)
printf("arg names:");
for (mp_uint_t i = 0; i < n_total_args; i++) {
- printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(*(mp_obj_t*)ip)));
- ip += sizeof(mp_obj_t);
+ printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(arg_names[i])));
}
printf("\n");
- // bytecode prelude: state size and exception stack size; 16 bit uints
- {
- uint n_state = mp_decode_uint(&ip);
- uint n_exc_stack = mp_decode_uint(&ip);
- printf("(N_STATE %u)\n", n_state);
- printf("(N_EXC_STACK %u)\n", n_exc_stack);
- }
+ printf("(N_STATE " UINT_FMT ")\n", n_state);
+ printf("(N_EXC_STACK " UINT_FMT ")\n", n_exc_stack);
+
+ // for printing line number info
+ const byte *bytecode_start = ip;
// bytecode prelude: initialise closed over variables
{
@@ -104,7 +110,7 @@
// print out line number info
{
- mp_int_t bc = (mp_showbc_code_start + code_info_size) - ip; // start counting from the prelude
+ mp_int_t bc = bytecode_start - ip;
mp_uint_t source_line = 1;
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
for (const byte* ci = code_info; *ci;) {