py: Reorganise bytecode layout so it's more structured, easier to edit.
diff --git a/py/bc.c b/py/bc.c
index 3e9125d..a4ee847 100644
--- a/py/bc.c
+++ b/py/bc.c
@@ -84,10 +84,8 @@
 
 // On entry code_state should be allocated somewhere (stack/heap) and
 // contain the following valid entries:
-//    - code_state->code_info should be the offset in bytes from the start of
-//      the bytecode chunk to the start of the code-info within the bytecode
 //    - code_state->ip should contain the offset in bytes from the start of
-//      the bytecode chunk to the start of the prelude within the bytecode
+//      the bytecode chunk to just after n_state and n_exc_stack
 //    - code_state->n_state should be set to the state size (locals plus stack)
 void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
     // This function is pretty complicated.  It's main aim is to be efficient in speed and RAM
@@ -95,10 +93,16 @@
     mp_obj_fun_bc_t *self = self_in;
     mp_uint_t n_state = code_state->n_state;
 
+    // ip comes in as an offset into bytecode, so turn it into a true pointer
+    code_state->ip = self->bytecode + (mp_uint_t)code_state->ip;
+
     #if MICROPY_STACKLESS
     code_state->prev = NULL;
     #endif
-    code_state->code_info = self->bytecode + (mp_uint_t)code_state->code_info;
+
+    // align ip
+    code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t));
+
     code_state->sp = &code_state->state[0] - 1;
     code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1;
 
@@ -156,13 +160,8 @@
             *var_pos_kw_args = dict;
         }
 
-        // get pointer to arg_names array at start of bytecode prelude
-        const mp_obj_t *arg_names;
-        {
-            const byte *code_info = code_state->code_info;
-            mp_uint_t code_info_size = mp_decode_uint(&code_info);
-            arg_names = (const mp_obj_t*)(code_state->code_info + code_info_size);
-        }
+        // get pointer to arg_names array
+        const mp_obj_t *arg_names = (const mp_obj_t*)code_state->ip;
 
         for (mp_uint_t i = 0; i < n_kw; i++) {
             mp_obj_t wanted_arg_name = kwargs[2 * i];
@@ -235,8 +234,19 @@
         }
     }
 
+    // get the ip and skip argument names
+    const byte *ip = code_state->ip;
+    ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_uint_t);
+
+    // store pointer to code_info and jump over it
+    {
+        code_state->code_info = ip;
+        const byte *ip2 = ip;
+        mp_uint_t code_info_size = mp_decode_uint(&ip2);
+        ip += code_info_size;
+    }
+
     // bytecode prelude: initialise closed over variables
-    const byte *ip = self->bytecode + (mp_uint_t)code_state->ip;
     mp_uint_t local_num;
     while ((local_num = *ip++) != 255) {
         code_state->state[n_state - 1 - local_num] =
diff --git a/py/bc.h b/py/bc.h
index b4b4d8c..73b67bc 100644
--- a/py/bc.h
+++ b/py/bc.h
@@ -29,6 +29,30 @@
 #include "py/runtime.h"
 #include "py/obj.h"
 
+// bytecode layout:
+//
+//  n_state         : var uint
+//  n_exc_stack     : var uint
+//
+//  <word alignment padding>
+//
+//  argname0        : obj (qstr)
+//  ...             : obj (qstr)
+//  argnameN        : obj (qstr)    N = num_pos_args + num_kwonly_args
+//
+//  code_info_size  : var uint |    code_info_size counts bytes in this chunk
+//  simple_name     : var qstr |
+//  source_file     : var qstr |
+//  <line number info>         |
+//  <word alignment padding>   |
+//
+//  num_cells       : byte          number of locals that are cells
+//  local_num0      : byte
+//  ...             : byte
+//  local_numN      : byte          N = num_cells
+//
+//  <bytecode>
+
 // Exception stack entry
 typedef struct _mp_exc_stack {
     const byte *handler;
diff --git a/py/emitbc.c b/py/emitbc.c
index 71ed4af..0ed7828 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -111,7 +111,11 @@
     emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
 }
 
-STATIC void emit_write_code_info_uint(emit_t *emit, mp_uint_t val) {
+STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
+    *emit_get_cur_to_write_code_info(emit, 1) = val;
+}
+
+STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
     emit_write_uint(emit, emit_get_cur_to_write_code_info, val);
 }
 
@@ -119,6 +123,13 @@
     emit_write_uint(emit, emit_get_cur_to_write_code_info, qst);
 }
 
+STATIC void emit_write_code_info_prealigned_ptr(emit_t* emit, void *ptr) {
+    mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_code_info(emit, sizeof(mp_uint_t));
+    // Verify thar c is already uint-aligned
+    assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
+    *c = (mp_uint_t)ptr;
+}
+
 #if MICROPY_ENABLE_SOURCE_LINE
 STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_skip, mp_uint_t lines_to_skip) {
     assert(bytes_to_skip > 0 || lines_to_skip > 0);
@@ -167,11 +178,7 @@
     c[0] = b1;
 }
 
-STATIC void emit_write_bytecode_uint(emit_t *emit, mp_uint_t val) {
-    emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
-}
-
-STATIC void emit_write_bytecode_byte_byte(emit_t *emit, byte b1, byte b2) {
+STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, byte b2) {
     assert((b2 & (~0xff)) == 0);
     byte *c = emit_get_cur_to_write_bytecode(emit, 2);
     c[0] = b1;
@@ -210,13 +217,6 @@
     emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
 }
 
-STATIC void emit_write_bytecode_prealigned_ptr(emit_t *emit, void *ptr) {
-    mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t));
-    // Verify thar c is already uint-aligned
-    assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
-    *c = (mp_uint_t)ptr;
-}
-
 // aligns the pointer so it is friendly to GC
 STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
     emit_write_bytecode_byte(emit, b);
@@ -227,15 +227,7 @@
     *c = (mp_uint_t)ptr;
 }
 
-/* currently unused
-STATIC void emit_write_bytecode_byte_uint_uint(emit_t *emit, byte b, mp_uint_t num1, mp_uint_t num2) {
-    emit_write_bytecode_byte(emit, b);
-    emit_write_bytecode_byte_uint(emit, num1);
-    emit_write_bytecode_byte_uint(emit, num2);
-}
-*/
-
-STATIC void emit_write_bytecode_byte_qstr(emit_t *emit, byte b, qstr qst) {
+STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) {
     emit_write_bytecode_byte_uint(emit, b, qst);
 }
 
@@ -289,19 +281,26 @@
     emit->bytecode_offset = 0;
     emit->code_info_offset = 0;
 
-    // Write code info size as compressed uint.  If we are not in the final pass
-    // then space for this uint is reserved in emit_bc_end_pass.
-    if (pass == MP_PASS_EMIT) {
-        emit_write_code_info_uint(emit, emit->code_info_size);
+    // Write local state size and exception stack size.
+    {
+        mp_uint_t n_state = scope->num_locals + scope->stack_size;
+        if (n_state == 0) {
+            // Need at least 1 entry in the state, in the case an exception is
+            // propagated through this function, the exception is returned in
+            // the highest slot in the state (fastn[0], see vm.c).
+            n_state = 1;
+        }
+        emit_write_code_info_uint(emit, n_state);
+        emit_write_code_info_uint(emit, scope->exc_stack_size);
     }
 
-    // write the name and source file of this function
-    emit_write_code_info_qstr(emit, scope->simple_name);
-    emit_write_code_info_qstr(emit, scope->source_file);
+    // Align code-info so that following pointers are aligned on a machine word.
+    emit_align_code_info_to_machine_word(emit);
 
-    // bytecode prelude: argument names (needed to resolve positional args passed as keywords)
-    // we store them as full word-sized objects for efficient access in mp_setup_code_state
-    // this is the start of the prelude and is guaranteed to be aligned on a word boundary
+    // Write argument names (needed to resolve positional args passed as
+    // keywords).  We store them as full word-sized objects for efficient access
+    // in mp_setup_code_state this is the start of the prelude and is guaranteed
+    // to be aligned on a word boundary.
     {
         // For a given argument position (indexed by i) we need to find the
         // corresponding id_info which is a parameter, as it has the correct
@@ -322,23 +321,23 @@
                     break;
                 }
             }
-            emit_write_bytecode_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
+            emit_write_code_info_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
         }
     }
 
-    // bytecode prelude: local state size and exception stack size
-    {
-        mp_uint_t n_state = scope->num_locals + scope->stack_size;
-        if (n_state == 0) {
-            // Need at least 1 entry in the state, in the case an exception is
-            // propagated through this function, the exception is returned in
-            // the highest slot in the state (fastn[0], see vm.c).
-            n_state = 1;
-        }
-        emit_write_bytecode_uint(emit, n_state);
-        emit_write_bytecode_uint(emit, scope->exc_stack_size);
+    // Write size of the rest of the code info.  We don't know how big this
+    // variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes
+    // for it and hope that is enough!  TODO assert this or something.
+    if (pass == MP_PASS_EMIT) {
+        emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset);
+    } else  {
+        emit_get_cur_to_write_code_info(emit, 2);
     }
 
+    // Write the name and source file of this function.
+    emit_write_code_info_qstr(emit, scope->simple_name);
+    emit_write_code_info_qstr(emit, scope->source_file);
+
     // bytecode prelude: initialise closed over variables
     for (int i = 0; i < scope->id_info_len; i++) {
         id_info_t *id = &scope->id_info[i];
@@ -360,25 +359,10 @@
         mp_printf(&mp_plat_print, "ERROR: stack size not back to zero; got %d\n", emit->stack_size);
     }
 
-    *emit_get_cur_to_write_code_info(emit, 1) = 0; // end of line number info
+    emit_write_code_info_byte(emit, 0); // end of line number info
 
     if (emit->pass == MP_PASS_CODE_SIZE) {
-        // Need to make sure we have enough room in the code-info block to write
-        // the size of the code-info block.  Since the size is written as a
-        // compressed uint, we don't know its size until we write it!  Thus, we
-        // take the biggest possible value it could be and write that here.
-        // Then there will be enough room to write the value, and any leftover
-        // space will be absorbed in the alignment at the end of the code-info
-        // block.
-        mp_uint_t max_code_info_size =
-            emit->code_info_offset  // current code-info size
-            + BYTES_FOR_INT         // maximum space for compressed uint
-            + BYTES_PER_WORD - 1;   // maximum space for alignment padding
-        emit_write_code_info_uint(emit, max_code_info_size);
-
-        // Align code-info so that following bytecode is aligned on a machine word.
-        // We don't need to write anything here, it's just dead space between the
-        // code-info block and the bytecode block that follows it.
+        // so bytecode is aligned
         emit_align_code_info_to_machine_word(emit);
 
         // calculate size of total code-info + bytecode, in bytes
diff --git a/py/emitnative.c b/py/emitnative.c
index d0614d3..1fcc843 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -566,8 +566,6 @@
     stack_info_t *stack_info;
     vtype_kind_t saved_stack_vtype;
 
-    int code_info_size;
-    int code_info_offset;
     int prelude_offset;
     int n_state;
     int stack_start;
@@ -774,10 +772,6 @@
         ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
         #endif
 
-        // set code_state.code_info (offset from start of this function to code_info data)
-        // XXX this encoding may change size
-        ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->code_info_offset, offsetof(mp_code_state, code_info) / sizeof(mp_uint_t), REG_ARG_1);
-
         // set code_state.ip (offset from start of this function to prelude info)
         // XXX this encoding may change size
         ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state, ip) / sizeof(mp_uint_t), REG_ARG_1);
@@ -829,11 +823,10 @@
     }
 
     if (!emit->do_viper_types) {
-        // write dummy code info (for mp_setup_code_state to parse) and arg names
-        emit->code_info_offset = ASM_GET_CODE_POS(emit->as);
-        ASM_DATA(emit->as, 1, emit->code_info_size);
+        emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
         ASM_ALIGN(emit->as, ASM_WORD_SIZE);
-        emit->code_info_size = ASM_GET_CODE_POS(emit->as) - emit->code_info_offset;
+
+        // write argument names as qstr objects
         // see comment in corresponding part of emitbc.c about the logic here
         for (int i = 0; i < emit->scope->num_pos_args + emit->scope->num_kwonly_args; i++) {
             qstr qst = MP_QSTR__star_;
@@ -847,8 +840,10 @@
             ASM_DATA(emit->as, ASM_WORD_SIZE, (mp_uint_t)MP_OBJ_NEW_QSTR(qst));
         }
 
+        // write dummy code info (for mp_setup_code_state to parse)
+        ASM_DATA(emit->as, 1, 1);
+
         // bytecode prelude: initialise closed over variables
-        emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
         for (int i = 0; i < emit->scope->id_info_len; i++) {
             id_info_t *id = &emit->scope->id_info[i];
             if (id->kind == ID_INFO_KIND_CELL) {
diff --git a/py/objfun.c b/py/objfun.c
index 774d8e64..53ddb0a 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -121,8 +121,13 @@
         return MP_QSTR_;
     }
     #endif
-    const byte *code_info = fun->bytecode;
-    return mp_obj_code_get_name(code_info);
+
+    const byte *bc = fun->bytecode;
+    mp_decode_uint(&bc); // skip n_state
+    mp_decode_uint(&bc); // skip n_exc_stack
+    bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align
+    bc += (fun->n_pos_args + fun->n_kwonly_args) * sizeof(mp_uint_t); // skip arg names
+    return mp_obj_code_get_name(bc);
 }
 
 #if MICROPY_CPYTHON_COMPAT
@@ -158,13 +163,8 @@
     MP_STACK_CHECK();
     mp_obj_fun_bc_t *self = self_in;
 
-    // skip code-info block
-    const byte *code_info = self->bytecode;
-    mp_uint_t code_info_size = mp_decode_uint(&code_info);
-    const byte *ip = self->bytecode + code_info_size;
-
-    // bytecode prelude: skip arg names
-    ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
+    // get start of bytecode
+    const byte *ip = self->bytecode;
 
     // bytecode prelude: state size and exception stack size
     mp_uint_t n_state = mp_decode_uint(&ip);
@@ -178,9 +178,8 @@
         return NULL;
     }
 
+    code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
     code_state->n_state = n_state;
-    code_state->code_info = 0; // offset to code-info
-    code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
     mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
 
     // execute the byte code with the correct globals context
@@ -202,13 +201,8 @@
     mp_obj_fun_bc_t *self = self_in;
     DEBUG_printf("Func n_def_args: %d\n", self->n_def_args);
 
-    // skip code-info block
-    const byte *code_info = self->bytecode;
-    mp_uint_t code_info_size = mp_decode_uint(&code_info);
-    const byte *ip = self->bytecode + code_info_size;
-
-    // bytecode prelude: skip arg names
-    ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
+    // get start of bytecode
+    const byte *ip = self->bytecode;
 
     // bytecode prelude: state size and exception stack size
     mp_uint_t n_state = mp_decode_uint(&ip);
@@ -229,9 +223,8 @@
         state_size = 0; // indicate that we allocated using alloca
     }
 
+    code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
     code_state->n_state = n_state;
-    code_state->code_info = 0; // offset to code-info
-    code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
     mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
 
     // execute the byte code with the correct globals context
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 4ba6bf7..f7b637e 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -54,13 +54,8 @@
     mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun;
     assert(MP_OBJ_IS_TYPE(self_fun, &mp_type_fun_bc));
 
-    // skip code-info block
-    const byte *code_info = self_fun->bytecode;
-    mp_uint_t code_info_size = mp_decode_uint(&code_info);
-    const byte *ip = self_fun->bytecode + code_info_size;
-
-    // bytecode prelude: skip arg names
-    ip += (self_fun->n_pos_args + self_fun->n_kwonly_args) * sizeof(mp_obj_t);
+    // get start of bytecode
+    const byte *ip = self_fun->bytecode;
 
     // bytecode prelude: get state size and exception stack size
     mp_uint_t n_state = mp_decode_uint(&ip);
diff --git a/py/showbc.c b/py/showbc.c
index 2da8d3f..87e7c6a 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -57,7 +57,16 @@
 void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip, mp_uint_t len) {
     mp_showbc_code_start = ip;
 
-    // get code info size
+    // get state size and exception stack size
+    mp_uint_t n_state = mp_decode_uint(&ip);
+    mp_uint_t n_exc_stack = mp_decode_uint(&ip);
+
+    ip = MP_ALIGN(ip, sizeof(mp_uint_t));
+
+    // get and skip arg names
+    const mp_obj_t *arg_names = (const mp_obj_t*)ip;
+    ip += n_total_args * sizeof(mp_uint_t);
+
     const byte *code_info = ip;
     mp_uint_t code_info_size = mp_decode_uint(&code_info);
     ip += code_info_size;
@@ -65,7 +74,7 @@
     qstr block_name = mp_decode_uint(&code_info);
     qstr source_file = mp_decode_uint(&code_info);
     printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n",
-        qstr_str(source_file), qstr_str(block_name), descr, code_info, len);
+        qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len);
 
     // raw bytecode dump
     printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", code_info_size, len - code_info_size);
@@ -80,18 +89,15 @@
     // bytecode prelude: arg names (as qstr objects)
     printf("arg names:");
     for (mp_uint_t i = 0; i < n_total_args; i++) {
-        printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(*(mp_obj_t*)ip)));
-        ip += sizeof(mp_obj_t);
+        printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(arg_names[i])));
     }
     printf("\n");
 
-    // bytecode prelude: state size and exception stack size; 16 bit uints
-    {
-        uint n_state = mp_decode_uint(&ip);
-        uint n_exc_stack = mp_decode_uint(&ip);
-        printf("(N_STATE %u)\n", n_state);
-        printf("(N_EXC_STACK %u)\n", n_exc_stack);
-    }
+    printf("(N_STATE " UINT_FMT ")\n", n_state);
+    printf("(N_EXC_STACK " UINT_FMT ")\n", n_exc_stack);
+
+    // for printing line number info
+    const byte *bytecode_start = ip;
 
     // bytecode prelude: initialise closed over variables
     {
@@ -104,7 +110,7 @@
 
     // print out line number info
     {
-        mp_int_t bc = (mp_showbc_code_start + code_info_size) - ip; // start counting from the prelude
+        mp_int_t bc = bytecode_start - ip;
         mp_uint_t source_line = 1;
         printf("  bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
         for (const byte* ci = code_info; *ci;) {