Initial commit.
diff --git a/py/.gitignore b/py/.gitignore
new file mode 100644
index 0000000..5761abc
--- /dev/null
+++ b/py/.gitignore
@@ -0,0 +1 @@
+*.o
diff --git a/py/Makefile b/py/Makefile
new file mode 100644
index 0000000..669453d
--- /dev/null
+++ b/py/Makefile
@@ -0,0 +1,50 @@
+CC = gcc
+CFLAGS = -Wall -ansi -std=gnu99 -Os #-DNDEBUG
+LDFLAGS =
+
+SRC = \
+	malloc.c \
+	misc.c \
+	qstr.c \
+	lexer.c \
+	lexerfile.c \
+	parse.c \
+	scope.c \
+	compile.c \
+	emitcommon.c \
+	emitcpy.c \
+	emitbc.c \
+	asmx64.c \
+	emitx64v2.c \
+	emitthumb.c \
+	asmthumb.c \
+	runtime.c \
+	bc.c \
+	main.c \
+
+SRC_ASM = \
+	runtime1.s \
+
+OBJ = $(SRC:.c=.o) $(SRC_ASM:.s=.o)
+LIB =
+PROG = py
+
+$(PROG): $(OBJ)
+	$(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS)
+
+runtime.o: runtime.c
+	$(CC) $(CFLAGS) -O3 -c -o $@ $<
+
+bc.o: bc.c
+	$(CC) $(CFLAGS) -O3 -c -o $@ $<
+
+parse.o: grammar.h
+compile.o: grammar.h
+emitcpy.o: emit.h
+emitbc.o: emit.h
+emitx64.o: emit.h
+emitx64v2.o: emit.h
+emitthumb.o: emit.h
+
+clean:
+	/bin/rm $(OBJ)
diff --git a/py/asmthumb.c b/py/asmthumb.c
new file mode 100644
index 0000000..ea7547d
--- /dev/null
+++ b/py/asmthumb.c
@@ -0,0 +1,421 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "asmthumb.h"
+
+#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0)
+#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0)
+#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
+#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00)
+#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
+
+struct _asm_thumb_t {
+    int pass;
+    uint code_offset;
+    uint code_size;
+    byte *code_base;
+    byte dummy_data[8];
+
+    int next_label;
+    int max_num_labels;
+    int *label_offsets;
+    int num_locals;
+    uint push_reglist;
+    uint stack_adjust;
+};
+
+asm_thumb_t *asm_thumb_new() {
+    asm_thumb_t *as;
+
+    as = m_new(asm_thumb_t, 1);
+    as->pass = 0;
+    as->code_offset = 0;
+    as->code_size = 0;
+    as->code_base = NULL;
+    as->label_offsets = NULL;
+    as->num_locals = 0;
+
+    return as;
+}
+
+void asm_thumb_free(asm_thumb_t *as, bool free_code) {
+    if (free_code) {
+        m_free(as->code_base);
+    }
+    /*
+    if (as->label != NULL) {
+        int i;
+        for (i = 0; i < as->label->len; ++i)
+        {
+            Label *lab = &g_array_index(as->label, Label, i);
+            if (lab->unresolved != NULL)
+                g_array_free(lab->unresolved, true);
+        }
+        g_array_free(as->label, true);
+    }
+    */
+    m_free(as);
+}
+
+void asm_thumb_start_pass(asm_thumb_t *as, int pass) {
+    as->pass = pass;
+    as->code_offset = 0;
+    as->next_label = 1;
+    if (pass == ASM_THUMB_PASS_1) {
+        as->max_num_labels = 0;
+    } else {
+        if (pass == ASM_THUMB_PASS_2) {
+            memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
+        }
+    }
+}
+
+void asm_thumb_end_pass(asm_thumb_t *as) {
+    if (as->pass == ASM_THUMB_PASS_1) {
+        // calculate number of labels need
+        if (as->next_label > as->max_num_labels) {
+            as->max_num_labels = as->next_label;
+        }
+        as->label_offsets = m_new(int, as->max_num_labels);
+    } else if (as->pass == ASM_THUMB_PASS_2) {
+        // calculate size of code in bytes
+        as->code_size = as->code_offset;
+        as->code_base = m_new(byte, as->code_size);
+        printf("code_size: %u\n", as->code_size);
+    }
+
+    /*
+    // check labels are resolved
+    if (as->label != NULL)
+    {
+        int i;
+        for (i = 0; i < as->label->len; ++i)
+            if (g_array_index(as->label, Label, i).unresolved != NULL)
+                return false;
+    }
+    */
+}
+
+// all functions must go through this one to emit bytes
+static byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) {
+    //printf("emit %d\n", num_bytes_to_write);
+    if (as->pass < ASM_THUMB_PASS_3) {
+        as->code_offset += num_bytes_to_write;
+        return as->dummy_data;
+    } else {
+        assert(as->code_offset + num_bytes_to_write <= as->code_size);
+        byte *c = as->code_base + as->code_offset;
+        as->code_offset += num_bytes_to_write;
+        return c;
+    }
+}
+
+uint asm_thumb_get_code_size(asm_thumb_t *as) {
+    return as->code_size;
+}
+
+void *asm_thumb_get_code(asm_thumb_t *as) {
+    // need to set low bit to indicate that it's thumb code
+    return (void *)(((machine_uint_t)as->code_base) | 1);
+}
+
+/*
+static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) {
+    byte *c = asm_thumb_get_cur_to_write_bytes(as, 1);
+    c[0] = b1;
+}
+*/
+
+static void asm_thumb_write_op16(asm_thumb_t *as, uint op) {
+    byte *c = asm_thumb_get_cur_to_write_bytes(as, 2);
+    // little endian
+    c[0] = op;
+    c[1] = op >> 8;
+}
+
+static void asm_thumb_write_op32(asm_thumb_t *as, uint op1, uint op2) {
+    byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
+    // little endian, op1 then op2
+    c[0] = op1;
+    c[1] = op1 >> 8;
+    c[2] = op2;
+    c[3] = op2 >> 8;
+}
+
+/*
+#define IMM32_L0(x) ((x) & 0xff)
+#define IMM32_L1(x) (((x) >> 8) & 0xff)
+#define IMM32_L2(x) (((x) >> 16) & 0xff)
+#define IMM32_L3(x) (((x) >> 24) & 0xff)
+
+static void asm_thumb_write_word32(asm_thumb_t *as, int w32) {
+    byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
+    c[0] = IMM32_L0(w32);
+    c[1] = IMM32_L1(w32);
+    c[2] = IMM32_L2(w32);
+    c[3] = IMM32_L3(w32);
+}
+*/
+
+// rlolist is a bit map indicating desired lo-registers
+#define OP_PUSH_RLIST(rlolist)      (0xb400 | (rlolist))
+#define OP_PUSH_RLIST_LR(rlolist)   (0xb400 | 0x0100 | (rlolist))
+#define OP_POP_RLIST(rlolist)       (0xbc00 | (rlolist))
+#define OP_POP_RLIST_PC(rlolist)    (0xbc00 | 0x0100 | (rlolist))
+
+#define OP_ADD_SP(num_words) (0xb000 | (num_words))
+#define OP_SUB_SP(num_words) (0xb080 | (num_words))
+
+void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
+    // work out what to push and how many extra space to reserve on stack
+    // so that we have enough for all locals and it's aligned an 8-byte boundary
+    uint reglist;
+    uint stack_adjust;
+    if (num_locals < 0) {
+        num_locals = 0;
+    }
+    // don't ppop r0 because it's used for return value
+    switch (num_locals) {
+        case 0:
+            reglist = 0xf2;
+            stack_adjust = 0;
+            break;
+
+        case 1:
+            reglist = 0xf2;
+            stack_adjust = 0;
+            break;
+
+        case 2:
+            reglist = 0xfe;
+            stack_adjust = 0;
+            break;
+
+        case 3:
+            reglist = 0xfe;
+            stack_adjust = 0;
+            break;
+
+        default:
+            reglist = 0xfe;
+            stack_adjust = ((num_locals - 3) + 1) & (~1);
+            break;
+    }
+    asm_thumb_write_op16(as, OP_PUSH_RLIST_LR(reglist));
+    if (stack_adjust > 0) {
+        asm_thumb_write_op16(as, OP_SUB_SP(stack_adjust));
+    }
+    as->push_reglist = reglist;
+    as->stack_adjust = stack_adjust;
+    as->num_locals = num_locals;
+}
+
+void asm_thumb_exit(asm_thumb_t *as) {
+    if (as->stack_adjust > 0) {
+        asm_thumb_write_op16(as, OP_ADD_SP(as->stack_adjust));
+    }
+    asm_thumb_write_op16(as, OP_POP_RLIST_PC(as->push_reglist));
+}
+
+int asm_thumb_label_new(asm_thumb_t *as) {
+    return as->next_label++;
+}
+
+void asm_thumb_label_assign(asm_thumb_t *as, int label) {
+    if (as->pass > ASM_THUMB_PASS_1) {
+        assert(label < as->max_num_labels);
+        if (as->pass == ASM_THUMB_PASS_2) {
+            // assign label offset
+            assert(as->label_offsets[label] == -1);
+            as->label_offsets[label] = as->code_offset;
+        } else if (as->pass == ASM_THUMB_PASS_3) {
+            // ensure label offset has not changed from PASS_2 to PASS_3
+            //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
+            assert(as->label_offsets[label] == as->code_offset);
+        }
+    }
+}
+
+// the i8 value will be zero extended into the r32 register!
+void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8) {
+    assert(rlo_dest < REG_R8);
+    // movs rlo_dest, #i8
+    asm_thumb_write_op16(as, 0x2000 | (rlo_dest << 8) | i8);
+}
+
+// if loading lo half, the i16 value will be zero extended into the r32 register!
+void asm_thumb_mov_i16_to_reg(asm_thumb_t *as, int i16, uint reg_dest, bool load_hi_half) {
+    assert(reg_dest < REG_R15);
+    uint op;
+    if (load_hi_half) {
+        // movt reg_dest, #i16
+        op = 0xf2c0;
+    } else {
+        // movw reg_dest, #i16
+        op = 0xf240;
+    }
+    asm_thumb_write_op32(as, op | ((i16 >> 1) & 0x0400) | ((i16 >> 12) & 0xf), ((i16 << 4) & 0x7000) | (reg_dest << 8) | (i16 & 0xff));
+}
+
+void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) {
+    // movw, movt does it in 8 bytes
+    // ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw
+
+    asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
+    asm_thumb_mov_i16_to_reg(as, i32 >> 16, reg_dest, true);
+}
+
+void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
+    if (reg_dest < 8 && UNSIGNED_FIT8(i32)) {
+        asm_thumb_mov_reg_i8(as, reg_dest, i32);
+    } else if (UNSIGNED_FIT16(i32)) {
+        asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
+    } else {
+        asm_thumb_mov_reg_i32(as, reg_dest, i32);
+    }
+}
+
+void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) {
+    uint op_lo;
+    if (reg_src < 8) {
+        op_lo = reg_src << 3;
+    } else {
+        op_lo = 0x40 | ((reg_src - 8) << 3);
+    }
+    if (reg_dest < 8) {
+        op_lo |= reg_dest;
+    } else {
+        op_lo |= 0x80 | (reg_dest - 8);
+    }
+    asm_thumb_write_op16(as, 0x4600 | op_lo);
+}
+
+#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
+#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
+
+void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
+    assert(rlo_src < REG_R8);
+    int word_offset = as->num_locals - local_num - 1;
+    assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
+    asm_thumb_write_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
+}
+
+void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
+    assert(rlo_dest < REG_R8);
+    int word_offset = as->num_locals - local_num - 1;
+    assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
+    asm_thumb_write_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
+}
+
+void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num) {
+    assert(0);
+    // see format 12, load address
+    asm_thumb_write_op16(as, 0x0000);
+}
+
+#define OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest))
+
+void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) {
+    asm_thumb_write_op16(as, OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b));
+}
+
+#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a))
+
+void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) {
+    asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b));
+}
+
+void asm_thumb_ite_ge(asm_thumb_t *as) {
+    asm_thumb_write_op16(as, 0xbfac);
+}
+
+#define OP_B(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff))
+// this could be wrong, because it should have a range of +/- 16MiB...
+#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
+#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))
+
+void asm_thumb_b_label(asm_thumb_t *as, int label) {
+    if (as->pass > ASM_THUMB_PASS_1) {
+        int dest = as->label_offsets[label];
+        int rel = dest - as->code_offset;
+        rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
+        if (dest >= 0 && rel <= -4) {
+            // is a backwards jump, so we know the size of the jump on the first pass
+            // calculate rel assuming 12 bit relative jump
+            if (SIGNED_FIT12(rel)) {
+                asm_thumb_write_op16(as, OP_B(rel));
+            } else {
+                goto large_jump;
+            }
+        } else {
+            // is a forwards jump, so need to assume it's large
+            large_jump:
+            asm_thumb_write_op32(as, OP_BW_HI(rel), OP_BW_LO(rel));
+        }
+    }
+}
+
+#define OP_CMP_REG_IMM(rlo, i8) (0x2800 | ((rlo) << 8) | (i8))
+// all these bit arithmetics need coverage testing!
+#define OP_BEQ(byte_offset) (0xd000 | (((byte_offset) >> 1) & 0x00ff))
+#define OP_BEQW_HI(byte_offset) (0xf000 | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f))
+#define OP_BEQW_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff))
+
+void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label) {
+    assert(rlo < REG_R8);
+
+    // compare reg with 0
+    asm_thumb_write_op16(as, OP_CMP_REG_IMM(rlo, 0));
+
+    // branch if equal
+    if (as->pass > ASM_THUMB_PASS_1) {
+        int dest = as->label_offsets[label];
+        int rel = dest - as->code_offset;
+        rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
+        if (dest >= 0 && rel <= -4) {
+            // is a backwards jump, so we know the size of the jump on the first pass
+            // calculate rel assuming 12 bit relative jump
+            if (SIGNED_FIT9(rel)) {
+                asm_thumb_write_op16(as, OP_BEQ(rel));
+            } else {
+                goto large_jump;
+            }
+        } else {
+            // is a forwards jump, so need to assume it's large
+            large_jump:
+            asm_thumb_write_op32(as, OP_BEQW_HI(rel), OP_BEQW_LO(rel));
+        }
+    }
+}
+
+#define OP_BLX(reg) (0x4780 | ((reg) << 3))
+#define OP_SVC(arg) (0xdf00 | (arg))
+#define OP_LDR_FROM_BASE_OFFSET(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest))
+
+void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) {
+    /* TODO make this use less bytes
+    uint rlo_base = REG_R3;
+    uint rlo_dest = REG_R7;
+    uint word_offset = 4;
+    asm_thumb_write_op16(as, 0x0000);
+    asm_thumb_write_op16(as, 0x6800 | (word_offset << 6) | (rlo_base << 3) | rlo_dest); // ldr rlo_dest, [rlo_base, #offset]
+    asm_thumb_write_op16(as, 0x4780 | (REG_R9 << 3)); // blx reg
+    */
+
+    if (0) {
+        // load ptr to function into register using immediate, then branch
+        // not relocatable
+        asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr);
+        asm_thumb_write_op16(as, OP_BLX(reg_temp));
+    } else if (1) {
+        asm_thumb_write_op16(as, OP_LDR_FROM_BASE_OFFSET(reg_temp, REG_R7, fun_id));
+        asm_thumb_write_op16(as, OP_BLX(reg_temp));
+    } else {
+        // use SVC
+        asm_thumb_write_op16(as, OP_SVC(fun_id));
+    }
+}
diff --git a/py/asmthumb.h b/py/asmthumb.h
new file mode 100644
index 0000000..d3ffb9a
--- /dev/null
+++ b/py/asmthumb.h
@@ -0,0 +1,60 @@
+#define ASM_THUMB_PASS_1 (1)
+#define ASM_THUMB_PASS_2 (2)
+#define ASM_THUMB_PASS_3 (3)
+
+#define REG_R0  (0)
+#define REG_R1  (1)
+#define REG_R2  (2)
+#define REG_R3  (3)
+#define REG_R4  (4)
+#define REG_R5  (5)
+#define REG_R6  (6)
+#define REG_R7  (7)
+#define REG_R8  (8)
+#define REG_R9  (9)
+#define REG_R10 (10)
+#define REG_R11 (11)
+#define REG_R12 (12)
+#define REG_R13 (13)
+#define REG_R14 (14)
+#define REG_R15 (15)
+#define REG_LR  (REG_R14)
+
+#define REG_RET REG_R0
+#define REG_ARG_1 REG_R0
+#define REG_ARG_2 REG_R1
+#define REG_ARG_3 REG_R2
+#define REG_ARG_4 REG_R3
+
+typedef struct _asm_thumb_t asm_thumb_t;
+
+asm_thumb_t *asm_thumb_new();
+void asm_thumb_free(asm_thumb_t *as, bool free_code);
+void asm_thumb_start_pass(asm_thumb_t *as, int pass);
+void asm_thumb_end_pass(asm_thumb_t *as);
+uint asm_thumb_get_code_size(asm_thumb_t *as);
+void *asm_thumb_get_code(asm_thumb_t *as);
+
+void asm_thumb_entry(asm_thumb_t *as, int num_locals);
+void asm_thumb_exit(asm_thumb_t *as);
+
+int asm_thumb_label_new(asm_thumb_t *as);
+void asm_thumb_label_assign(asm_thumb_t *as, int label);
+
+// argument order follows ARM, in general dest is first
+
+void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8_src);
+void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src);
+void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src);
+void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src);
+void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src);
+void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num);
+void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num);
+
+void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b);
+void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b);
+void asm_thumb_ite_ge(asm_thumb_t *as);
+
+void asm_thumb_b_label(asm_thumb_t *as, int label);
+void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label);
+void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp);
diff --git a/py/asmx64.c b/py/asmx64.c
new file mode 100644
index 0000000..59c8113
--- /dev/null
+++ b/py/asmx64.c
@@ -0,0 +1,621 @@
+#include <stdio.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include "misc.h"
+#include "asmx64.h"
+
+/* all offsets are measured in multiples of 8 bytes */
+#define WORD_SIZE                (8)
+
+#define OPCODE_NOP               (0x90)
+#define OPCODE_PUSH_R64          (0x50)
+#define OPCODE_PUSH_I64          (0x68)
+#define OPCODE_PUSH_M64          (0xff) /* /6 */
+#define OPCODE_POP_R64           (0x58)
+#define OPCODE_RET               (0xc3)
+#define OPCODE_MOV_I8_TO_R8      (0xb0) /* +rb */
+#define OPCODE_MOV_I64_TO_R64    (0xb8)
+#define OPCODE_MOV_I32_TO_RM32   (0xc7)
+#define OPCODE_MOV_R64_TO_RM64   (0x89)
+#define OPCODE_MOV_RM64_TO_R64   (0x8b)
+#define OPCODE_LEA_MEM_TO_R64    (0x8d) /* /r */
+#define OPCODE_XOR_R64_TO_RM64   (0x31) /* /r */
+#define OPCODE_ADD_R64_TO_RM64   (0x01)
+#define OPCODE_ADD_I32_TO_RM32   (0x81) /* /0 */
+#define OPCODE_ADD_I8_TO_RM32    (0x83) /* /0 */
+#define OPCODE_SUB_R64_FROM_RM64 (0x29)
+#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */
+#define OPCODE_SUB_I8_FROM_RM64  (0x83) /* /5 */
+#define OPCODE_SHL_RM32_BY_I8    (0xc1) /* /4 */
+#define OPCODE_SHR_RM32_BY_I8    (0xc1) /* /5 */
+#define OPCODE_SAR_RM32_BY_I8    (0xc1) /* /7 */
+#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */
+#define OPCODE_CMP_I8_WITH_RM32  (0x83) /* /7 */
+#define OPCODE_CMP_R64_WITH_RM64 (0x39)
+#define OPCODE_CMP_RM32_WITH_R32 (0x3b)
+#define OPCODE_TEST_R8_WITH_RM8  (0x84) /* /r */
+#define OPCODE_JMP_REL8          (0xeb)
+#define OPCODE_JMP_REL32         (0xe9)
+#define OPCODE_JCC_REL8          (0x70) /* | jcc type */
+#define OPCODE_JCC_REL32_A       (0x0f)
+#define OPCODE_JCC_REL32_B       (0x80) /* | jcc type */
+#define OPCODE_SETCC_RM8_A       (0x0f)
+#define OPCODE_SETCC_RM8_B       (0x90) /* | jcc type, /0 */
+#define OPCODE_CALL_REL32        (0xe8)
+#define OPCODE_CALL_RM32         (0xff) /* /2 */
+#define OPCODE_LEAVE             (0xc9)
+
+#define MODRM_R64(x)    ((x) << 3)
+#define MODRM_RM_DISP0  (0x00)
+#define MODRM_RM_DISP8  (0x40)
+#define MODRM_RM_DISP32 (0x80)
+#define MODRM_RM_REG    (0xc0)
+#define MODRM_RM_R64(x) (x)
+
+#define REX_PREFIX  (0x40)
+#define REX_W       (0x08)  // width
+#define REX_R       (0x04)  // register
+#define REX_X       (0x02)  // index
+#define REX_B       (0x01)  // base
+
+#define IMM32_L0(x) ((x) & 0xff)
+#define IMM32_L1(x) (((x) >> 8) & 0xff)
+#define IMM32_L2(x) (((x) >> 16) & 0xff)
+#define IMM32_L3(x) (((x) >> 24) & 0xff)
+#define IMM64_L4(x) (((x) >> 32) & 0xff)
+#define IMM64_L5(x) (((x) >> 40) & 0xff)
+#define IMM64_L6(x) (((x) >> 48) & 0xff)
+#define IMM64_L7(x) (((x) >> 56) & 0xff)
+
+#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0)
+#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0)
+#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
+
+struct _asm_x64_t {
+    int pass;
+    uint code_offset;
+    uint code_size;
+    byte *code_base;
+    byte dummy_data[8];
+
+    int next_label;
+    int max_num_labels;
+    int *label_offsets;
+};
+
+// for allocating memory, see src/v8/src/platform-linux.cc
+void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) {
+    req_size = (req_size + 0xfff) & (~0xfff);
+    int prot = PROT_READ | PROT_WRITE | (is_exec ? PROT_EXEC : 0);
+    void *ptr = mmap(NULL, req_size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (ptr == MAP_FAILED) {
+        assert(0);
+    }
+    *alloc_size = req_size;
+    return ptr;
+}
+
+asm_x64_t* asm_x64_new() {
+    asm_x64_t* as;
+
+    as = m_new(asm_x64_t, 1);
+    as->pass = 0;
+    as->code_offset = 0;
+    as->code_size = 0;
+    as->code_base = NULL;
+    as->label_offsets = NULL;
+
+    return as;
+}
+
+void asm_x64_free(asm_x64_t* as, bool free_code) {
+    if (free_code) {
+        m_free(as->code_base);
+    }
+    /*
+    if (as->label != NULL) {
+        int i;
+        for (i = 0; i < as->label->len; ++i)
+        {
+            Label* lab = &g_array_index(as->label, Label, i);
+            if (lab->unresolved != NULL)
+                g_array_free(lab->unresolved, true);
+        }
+        g_array_free(as->label, true);
+    }
+    */
+    m_free(as);
+}
+
+void asm_x64_start_pass(asm_x64_t *as, int pass) {
+    as->pass = pass;
+    as->code_offset = 0;
+    as->next_label = 1;
+    if (pass == ASM_X64_PASS_1) {
+        as->max_num_labels = 0;
+    } else {
+        if (pass == ASM_X64_PASS_2) {
+            memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
+        }
+    }
+}
+
+void asm_x64_end_pass(asm_x64_t *as) {
+    if (as->pass == ASM_X64_PASS_1) {
+        // calculate number of labels need
+        if (as->next_label > as->max_num_labels) {
+            as->max_num_labels = as->next_label;
+        }
+        as->label_offsets = m_new(int, as->max_num_labels);
+    } else if (as->pass == ASM_X64_PASS_2) {
+        // calculate size of code in bytes
+        as->code_size = as->code_offset;
+        as->code_base = m_new(byte, as->code_size);
+        printf("code_size: %u\n", as->code_size);
+    }
+
+    /*
+    // check labels are resolved
+    if (as->label != NULL)
+    {
+        int i;
+        for (i = 0; i < as->label->len; ++i)
+            if (g_array_index(as->label, Label, i).unresolved != NULL)
+                return false;
+    }
+    */
+}
+
+// all functions must go through this one to emit bytes
+static byte* asm_x64_get_cur_to_write_bytes(asm_x64_t* as, int num_bytes_to_write) {
+    //printf("emit %d\n", num_bytes_to_write);
+    if (as->pass < ASM_X64_PASS_3) {
+        as->code_offset += num_bytes_to_write;
+        return as->dummy_data;
+    } else {
+        assert(as->code_offset + num_bytes_to_write <= as->code_size);
+        byte *c = as->code_base + as->code_offset;
+        as->code_offset += num_bytes_to_write;
+        return c;
+    }
+}
+
+uint asm_x64_get_code_size(asm_x64_t* as) {
+    return as->code_size;
+}
+
+void* asm_x64_get_code(asm_x64_t* as) {
+    return as->code_base;
+}
+
+static void asm_x64_write_byte_1(asm_x64_t* as, byte b1) {
+    byte* c = asm_x64_get_cur_to_write_bytes(as, 1);
+    c[0] = b1;
+}
+
+static void asm_x64_write_byte_2(asm_x64_t* as, byte b1, byte b2) {
+    byte* c = asm_x64_get_cur_to_write_bytes(as, 2);
+    c[0] = b1;
+    c[1] = b2;
+}
+
+static void asm_x64_write_byte_3(asm_x64_t* as, byte b1, byte b2, byte b3) {
+    byte* c = asm_x64_get_cur_to_write_bytes(as, 3);
+    c[0] = b1;
+    c[1] = b2;
+    c[2] = b3;
+}
+
+static void asm_x64_write_word32(asm_x64_t* as, int w32) {
+    byte* c = asm_x64_get_cur_to_write_bytes(as, 4);
+    c[0] = IMM32_L0(w32);
+    c[1] = IMM32_L1(w32);
+    c[2] = IMM32_L2(w32);
+    c[3] = IMM32_L3(w32);
+}
+
+static void asm_x64_write_word64(asm_x64_t* as, int64_t w64) {
+    byte* c = asm_x64_get_cur_to_write_bytes(as, 8);
+    c[0] = IMM32_L0(w64);
+    c[1] = IMM32_L1(w64);
+    c[2] = IMM32_L2(w64);
+    c[3] = IMM32_L3(w64);
+    c[4] = IMM64_L4(w64);
+    c[5] = IMM64_L5(w64);
+    c[6] = IMM64_L6(w64);
+    c[7] = IMM64_L7(w64);
+}
+
+/* unused
+static void asm_x64_write_word32_to(asm_x64_t* as, int offset, int w32) {
+    byte* c;
+    assert(offset + 4 <= as->code_size);
+    c = as->code_base + offset;
+    c[0] = IMM32_L0(w32);
+    c[1] = IMM32_L1(w32);
+    c[2] = IMM32_L2(w32);
+    c[3] = IMM32_L3(w32);
+}
+*/
+
+static void asm_x64_write_r64_disp(asm_x64_t* as, int r64, int disp_r64, int disp_offset) {
+    assert(disp_r64 != REG_RSP);
+
+    if (disp_offset == 0 && disp_r64 != REG_RBP) {
+        asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP0 | MODRM_RM_R64(disp_r64));
+    } else if (SIGNED_FIT8(disp_offset)) {
+        asm_x64_write_byte_2(as, MODRM_R64(r64) | MODRM_RM_DISP8 | MODRM_RM_R64(disp_r64), IMM32_L0(disp_offset));
+    } else {
+        asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP32 | MODRM_RM_R64(disp_r64));
+        asm_x64_write_word32(as, disp_offset);
+    }
+}
+
+void asm_x64_nop(asm_x64_t* as)
+{
+    asm_x64_write_byte_1(as, OPCODE_NOP);
+}
+
+void asm_x64_push_r64(asm_x64_t* as, int src_r64)
+{
+    asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64);
+}
+
+void asm_x64_push_i32(asm_x64_t* as, int src_i32)
+{
+    asm_x64_write_byte_1(as, OPCODE_PUSH_I64);
+    asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits
+}
+
+void asm_x64_push_disp(asm_x64_t* as, int src_r64, int src_offset) {
+    asm_x64_write_byte_1(as, OPCODE_PUSH_M64);
+    asm_x64_write_r64_disp(as, 6, src_r64, src_offset);
+}
+
+void asm_x64_pop_r64(asm_x64_t* as, int dest_r64)
+{
+    asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64);
+}
+
+static void asm_x64_ret(asm_x64_t* as)
+{
+    asm_x64_write_byte_1(as, OPCODE_RET);
+}
+
+void asm_x64_mov_r32_to_r32(asm_x64_t* as, int src_r32, int dest_r32) {
+    // defaults to 32 bit operation
+    asm_x64_write_byte_2(as, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+}
+
+void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
+    // use REX prefix for 64 bit operation
+    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_mov_r64_to_disp(asm_x64_t* as, int src_r64, int dest_r64, int dest_disp) {
+    // use REX prefix for 64 bit operation
+    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64);
+    asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
+}
+
+void asm_x64_mov_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
+    // use REX prefix for 64 bit operation
+    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_RM64_TO_R64);
+    asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
+}
+
+void asm_x64_lea_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
+    // use REX prefix for 64 bit operation
+    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64);
+    asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
+}
+
+void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) {
+    asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8);
+}
+
+void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64) {
+    // cpu defaults to i32 to r64, with zero extension
+    asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64);
+    asm_x64_write_word32(as, src_i32);
+}
+
+void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64) {
+    // cpu defaults to i32 to r64
+    // to mov i64 to r64 need to use REX prefix
+    asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_I64_TO_R64 | dest_r64);
+    asm_x64_write_word64(as, src_i64);
+}
+
+void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) {
+    if (UNSIGNED_FIT32(src_i64)) {
+        // 5 bytes
+        asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64);
+    } else {
+        // 10 bytes
+        asm_x64_mov_i64_to_r64(as, src_i64, dest_r64);
+    }
+}
+
+void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp)
+{
+    assert(0);
+    asm_x64_write_byte_1(as, OPCODE_MOV_I32_TO_RM32);
+    //asm_x64_write_r32_disp(as, 0, dest_r32, dest_disp);
+    asm_x64_write_word32(as, src_i32);
+}
+
+void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) {
+    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
+    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32)
+{
+    assert(dest_r32 != REG_RSP); // in this case i think src_i32 must be 64 bits
+    if (SIGNED_FIT8(src_i32))
+    {
+        asm_x64_write_byte_2(as, OPCODE_ADD_I8_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+        asm_x64_write_byte_1(as, src_i32 & 0xff);
+    }
+    else
+    {
+        asm_x64_write_byte_2(as, OPCODE_ADD_I32_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+        asm_x64_write_word32(as, src_i32);
+    }
+}
+
+void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32) {
+    // defaults to 32 bit operation
+    asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+}
+
+void asm_x64_sub_r64_from_r64(asm_x64_t* as, int src_r64, int dest_r64) {
+    // use REX prefix for 64 bit operation
+    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+}
+
+void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32) {
+    if (SIGNED_FIT8(src_i32)) {
+        // defaults to 32 bit operation
+        asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+        asm_x64_write_byte_1(as, src_i32 & 0xff);
+    } else {
+        // defaults to 32 bit operation
+        asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
+        asm_x64_write_word32(as, src_i32);
+    }
+}
+
+void asm_x64_sub_i32_from_r64(asm_x64_t* as, int src_i32, int dest_r64) {
+    if (SIGNED_FIT8(src_i32)) {
+        // use REX prefix for 64 bit operation
+        asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+        asm_x64_write_byte_1(as, src_i32 & 0xff);
+    } else {
+        // use REX prefix for 64 bit operation
+        asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
+        asm_x64_write_word32(as, src_i32);
+    }
+}
+
+/* shifts not tested */
+void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm) {
+    asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32));
+    asm_x64_write_byte_1(as, imm);
+}
+
+void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm) {
+    asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32));
+    asm_x64_write_byte_1(as, imm);
+}
+
+void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm) {
+    asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32));
+    asm_x64_write_byte_1(as, imm);
+}
+
+void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b) {
+    asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
+}
+
+void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b) {
+    assert(0);
+    asm_x64_write_byte_1(as, OPCODE_CMP_R64_WITH_RM64);
+    //asm_x64_write_r32_disp(as, src_r32_a, src_r32_b, src_disp_b);
+}
+
+void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b) {
+    assert(0);
+    asm_x64_write_byte_1(as, OPCODE_CMP_RM32_WITH_R32);
+    //asm_x64_write_r32_disp(as, src_r32_b, src_r32_a, src_disp_a);
+}
+
+void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32) {
+    if (SIGNED_FIT8(src_i32)) {
+        asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
+        asm_x64_write_byte_1(as, src_i32 & 0xff);
+    } else {
+        asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
+        asm_x64_write_word32(as, src_i32);
+    }
+}
+
+void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b) {
+    asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
+}
+
+void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8) {
+    asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8));
+}
+
+int asm_x64_label_new(asm_x64_t* as) {
+    return as->next_label++;
+}
+
+void asm_x64_label_assign(asm_x64_t* as, int label) {
+    if (as->pass > ASM_X64_PASS_1) {
+        assert(label < as->max_num_labels);
+        if (as->pass == ASM_X64_PASS_2) {
+            // assign label offset
+            assert(as->label_offsets[label] == -1);
+            as->label_offsets[label] = as->code_offset;
+        } else if (as->pass == ASM_X64_PASS_3) {
+            // ensure label offset has not changed from PASS_2 to PASS_3
+            //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
+            assert(as->label_offsets[label] == as->code_offset);
+        }
+    }
+}
+
+void asm_x64_jmp_label(asm_x64_t* as, int label) {
+    if (as->pass > ASM_X64_PASS_1) {
+        int dest = as->label_offsets[label];
+        int rel = dest - as->code_offset;
+        if (dest >= 0 && rel < 0) {
+            // is a backwards jump, so we know the size of the jump on the first pass
+            // calculate rel assuming 8 bit relative jump
+            rel -= 2;
+            if (SIGNED_FIT8(rel)) {
+                asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff);
+            } else {
+                rel += 2;
+                goto large_jump;
+            }
+        } else {
+            // is a forwards jump, so need to assume it's large
+            large_jump:
+            rel -= 5;
+            asm_x64_write_byte_1(as, OPCODE_JMP_REL32);
+            asm_x64_write_word32(as, rel);
+        }
+    }
+}
+
+void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label) {
+    if (as->pass > ASM_X64_PASS_1) {
+        int dest = as->label_offsets[label];
+        int rel = dest - as->code_offset;
+        if (dest >= 0 && rel < 0) {
+            // is a backwards jump, so we know the size of the jump on the first pass
+            // calculate rel assuming 8 bit relative jump
+            rel -= 2;
+            if (SIGNED_FIT8(rel)) {
+                asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff);
+            } else {
+                rel += 2;
+                goto large_jump;
+            }
+        } else {
+            // is a forwards jump, so need to assume it's large
+            large_jump:
+            rel -= 6;
+            asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type);
+            asm_x64_write_word32(as, rel);
+        }
+    }
+}
+
+void asm_x64_entry(asm_x64_t* as, int num_locals) {
+    asm_x64_push_r64(as, REG_RBP);
+    asm_x64_mov_r64_to_r64(as, REG_RSP, REG_RBP);
+    if (num_locals < 0) {
+        num_locals = 0;
+    }
+    num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary
+    asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, REG_RSP);
+    asm_x64_push_r64(as, REG_RBX);
+}
+
+void asm_x64_exit(asm_x64_t* as) {
+    asm_x64_pop_r64(as, REG_RBX);
+    asm_x64_write_byte_1(as, OPCODE_LEAVE);
+    asm_x64_ret(as);
+}
+
+void asm_x64_push_arg(asm_x64_t* as, int src_arg_num) {
+    assert(0);
+    asm_x64_push_disp(as, REG_RBP, 8 + src_arg_num * WORD_SIZE);
+}
+
+void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32) {
+    assert(0);
+    //asm_x64_mov_disp_to_r32(as, REG_RBP, 8 + src_arg_num * WORD_SIZE, dest_r32);
+}
+
+void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num) {
+    assert(0);
+    //asm_x64_mov_r32_to_disp(as, src_r32, REG_RBP, 8 + dest_arg_num * WORD_SIZE);
+}
+
+static int asm_x64_local_offset_from_ebp(int local_num)
+{
+    return -(local_num + 1) * WORD_SIZE;
+}
+
+void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64) {
+    asm_x64_mov_disp_to_r64(as, REG_RBP, asm_x64_local_offset_from_ebp(src_local_num), dest_r64);
+}
+
+void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num) {
+    asm_x64_mov_r64_to_disp(as, src_r64, REG_RBP, asm_x64_local_offset_from_ebp(dest_local_num));
+}
+
+void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64) {
+    int offset = asm_x64_local_offset_from_ebp(local_num);
+    if (offset == 0) {
+        asm_x64_mov_r64_to_r64(as, REG_RBP, dest_r64);
+    } else {
+        asm_x64_lea_disp_to_r64(as, REG_RBP, offset, dest_r64);
+    }
+}
+
+void asm_x64_push_local(asm_x64_t* as, int local_num) {
+    asm_x64_push_disp(as, REG_RBP, asm_x64_local_offset_from_ebp(local_num));
+}
+
+void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r64)
+{
+    asm_x64_mov_r64_to_r64(as, REG_RBP, temp_r64);
+    asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(local_num), temp_r64);
+    asm_x64_push_r64(as, temp_r64);
+}
+
+/*
+   can't use these because code might be relocated when resized
+
+void asm_x64_call(asm_x64_t* as, void* func)
+{
+    asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
+    asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
+    asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
+    asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
+}
+
+void asm_x64_call_i1(asm_x64_t* as, void* func, int i1)
+{
+    asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
+    asm_x64_sub_i32_from_r32(as, 12, REG_RSP);
+    asm_x64_push_i32(as, i1);
+    asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
+    asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
+    asm_x64_add_i32_to_r32(as, 16, REG_RSP);
+    asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
+}
+*/
+
+void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) {
+    /*
+    asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64);
+    asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
+    */
+    // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all
+    asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
+    asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4));
+}
diff --git a/py/asmx64.h b/py/asmx64.h
new file mode 100644
index 0000000..4871dbf
--- /dev/null
+++ b/py/asmx64.h
@@ -0,0 +1,76 @@
+#define ASM_X64_PASS_1 (1)
+#define ASM_X64_PASS_2 (2)
+#define ASM_X64_PASS_3 (3)
+
+#define REG_RAX (0)
+#define REG_RCX (1)
+#define REG_RDX (2)
+#define REG_RBX (3)
+#define REG_RSP (4)
+#define REG_RBP (5)
+#define REG_RSI (6)
+#define REG_RDI (7)
+
+// condition codes, used for jcc and setcc (desipite their j-name!)
+#define JCC_JB  (0x2) // below, unsigned
+#define JCC_JZ  (0x4)
+#define JCC_JE  (0x4)
+#define JCC_JNZ (0x5)
+#define JCC_JNE (0x5)
+#define JCC_JL  (0xc) // less, signed
+
+#define REG_RET REG_RAX
+#define REG_ARG_1 REG_RDI
+#define REG_ARG_2 REG_RSI
+#define REG_ARG_3 REG_RDX
+
+typedef struct _asm_x64_t asm_x64_t;
+
+asm_x64_t* asm_x64_new();
+void asm_x64_free(asm_x64_t* as, bool free_code);
+void asm_x64_start_pass(asm_x64_t *as, int pass);
+void asm_x64_end_pass(asm_x64_t *as);
+uint asm_x64_get_code_size(asm_x64_t* as);
+void* asm_x64_get_code(asm_x64_t* as);
+
+void asm_x64_nop(asm_x64_t* as);
+void asm_x64_push_r64(asm_x64_t* as, int src_r64);
+void asm_x64_push_i32(asm_x64_t* as, int src_i32); // will be sign extended to 64 bits
+void asm_x64_push_disp(asm_x64_t* as, int src_r32, int src_offset);
+void asm_x64_pop_r64(asm_x64_t* as, int dest_r64);
+void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
+void asm_x64_mov_r32_to_disp(asm_x64_t* as, int src_r32, int dest_r32, int dest_disp);
+void asm_x64_mov_disp_to_r32(asm_x64_t* as, int src_r32, int src_disp, int dest_r32);
+void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64);
+void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64);
+void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp);
+void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64);
+void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64);
+void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
+void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32);
+void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32);
+void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32);
+void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm);
+void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm);
+void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm);
+void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b);
+void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b);
+void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b);
+void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32);
+void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b);
+void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8);
+int asm_x64_label_new(asm_x64_t* as);
+void asm_x64_label_assign(asm_x64_t* as, int label);
+void asm_x64_jmp_label(asm_x64_t* as, int label);
+void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label);
+void asm_x64_entry(asm_x64_t* as, int num_locals);
+void asm_x64_exit(asm_x64_t* as);
+void asm_x64_push_arg(asm_x64_t* as, int src_arg_num);
+void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32);
+void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num);
+void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64);
+void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num);
+void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64);
+void asm_x64_push_local(asm_x64_t* as, int local_num);
+void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r32);
+void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32);
diff --git a/py/bc.c b/py/bc.c
new file mode 100644
index 0000000..1edd911
--- /dev/null
+++ b/py/bc.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "runtime.h"
+#include "bc.h"
+
+#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0)
+#define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0)
+#define PUSH(val) *--sp = (val)
+#define POP() (*sp++)
+
+py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args) {
+    byte *ip = code;
+    py_obj_t stack[10];
+    py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack
+    machine_uint_t unum;
+    machine_int_t snum;
+    qstr qstr;
+    py_obj_t obj1, obj2;
+    py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL};
+
+    // init args
+    for (int i = 0; i < n_args; i++) {
+        if (i == 0) {
+            fast0 = args[0];
+        } else if (i == 1) {
+            fast1 = args[1];
+        } else if (i == 2) {
+            fast2 = args[2];
+        } else {
+            assert(i - 3 < 4);
+            fastn[i - 3] = args[i];
+        }
+    }
+
+    // execute byte code
+    for (;;) {
+        int op = *ip++;
+        switch (op) {
+            case PYBC_LOAD_CONST_FALSE:
+                PUSH(py_const_false);
+                break;
+
+            case PYBC_LOAD_CONST_NONE:
+                PUSH(py_const_none);
+                break;
+
+            case PYBC_LOAD_CONST_TRUE:
+                PUSH(py_const_true);
+                break;
+
+            case PYBC_LOAD_CONST_SMALL_INT:
+                snum = ip[0] | (ip[1] << 8);
+                if (snum & 0x8000) {
+                    snum |= ~0xffff;
+                }
+                ip += 2;
+                PUSH((py_obj_t)(snum << 1 | 1));
+                break;
+
+            case PYBC_LOAD_CONST_ID:
+                DECODE_QSTR;
+                PUSH(rt_load_const_str(qstr)); // TODO
+                break;
+
+            case PYBC_LOAD_CONST_STRING:
+                DECODE_QSTR;
+                PUSH(rt_load_const_str(qstr));
+                break;
+
+            case PYBC_LOAD_FAST_0:
+                PUSH(fast0);
+                break;
+
+            case PYBC_LOAD_FAST_1:
+                PUSH(fast1);
+                break;
+
+            case PYBC_LOAD_FAST_2:
+                PUSH(fast2);
+                break;
+
+            case PYBC_LOAD_FAST_N:
+                DECODE_UINT;
+                PUSH(fastn[unum - 3]);
+                break;
+
+            case PYBC_LOAD_NAME:
+                DECODE_QSTR;
+                PUSH(rt_load_name(qstr));
+                break;
+
+            case PYBC_LOAD_GLOBAL:
+                DECODE_QSTR;
+                PUSH(rt_load_global(qstr));
+                break;
+
+            case PYBC_LOAD_ATTR:
+                DECODE_QSTR;
+                *sp = rt_load_attr(*sp, qstr);
+                break;
+
+            case PYBC_LOAD_METHOD:
+                DECODE_QSTR;
+                sp -= 1;
+                rt_load_method(sp[1], qstr, sp);
+                break;
+
+            case PYBC_LOAD_BUILD_CLASS:
+                PUSH(rt_load_build_class());
+                break;
+
+            case PYBC_STORE_FAST_0:
+                fast0 = POP();
+                break;
+
+            case PYBC_STORE_FAST_1:
+                fast1 = POP();
+                break;
+
+            case PYBC_STORE_FAST_2:
+                fast2 = POP();
+                break;
+
+            case PYBC_STORE_FAST_N:
+                DECODE_UINT;
+                fastn[unum - 3] = POP();
+                break;
+
+            case PYBC_STORE_NAME:
+                DECODE_QSTR;
+                rt_store_name(qstr, POP());
+                break;
+
+            case PYBC_STORE_SUBSCR:
+                rt_store_subscr(sp[1], sp[0], sp[2]);
+                sp += 3;
+                break;
+
+            case PYBC_DUP_TOP:
+                obj1 = *sp;
+                PUSH(obj1);
+                break;
+
+            case PYBC_DUP_TOP_TWO:
+                sp -= 2;
+                sp[0] = sp[2];
+                sp[1] = sp[3];
+                break;
+
+            case PYBC_POP_TOP:
+                ++sp;
+                break;
+
+            case PYBC_ROT_THREE:
+                obj1 = sp[0];
+                sp[0] = sp[1];
+                sp[1] = sp[2];
+                sp[2] = obj1;
+                break;
+
+            case PYBC_JUMP:
+                DECODE_UINT;
+                ip = code + unum;
+                break;
+
+            case PYBC_POP_JUMP_IF_FALSE:
+                DECODE_UINT;
+                if (!rt_is_true(POP())) {
+                    ip = code + unum;
+                }
+                break;
+
+            case PYBC_SETUP_LOOP:
+                DECODE_UINT;
+                break;
+
+            case PYBC_POP_BLOCK:
+                break;
+
+            case PYBC_BINARY_OP:
+                unum = *ip++;
+                obj2 = POP();
+                obj1 = *sp;
+                *sp = rt_binary_op(unum, obj1, obj2);
+                break;
+
+            case PYBC_COMPARE_OP:
+                unum = *ip++;
+                obj2 = POP();
+                obj1 = *sp;
+                *sp = rt_compare_op(unum, obj1, obj2);
+                break;
+
+            case PYBC_BUILD_LIST:
+                DECODE_UINT;
+                obj1 = rt_build_list(unum, sp);
+                sp += unum - 1;
+                *sp = obj1;
+                break;
+
+            case PYBC_BUILD_MAP:
+                DECODE_UINT;
+                PUSH(rt_build_map(unum));
+                break;
+
+            case PYBC_STORE_MAP:
+                sp += 2;
+                rt_store_map(sp[0], sp[-2], sp[-1]);
+                break;
+
+            case PYBC_BUILD_SET:
+                DECODE_UINT;
+                obj1 = rt_build_set(unum, sp);
+                sp += unum - 1;
+                *sp = obj1;
+                break;
+
+            case PYBC_MAKE_FUNCTION:
+                DECODE_UINT;
+                PUSH(rt_make_function_from_id(unum));
+                break;
+
+            case PYBC_CALL_FUNCTION:
+                DECODE_UINT;
+                assert((unum & 0xff00) == 0); // n_keyword
+                // switch on n_positional
+                if ((unum & 0xff) == 0) {
+                    *sp = rt_call_function_0(*sp);
+                } else if ((unum & 0xff) == 1) {
+                    obj1 = *sp++; // the single argument
+                    *sp = rt_call_function_1(*sp, obj1);
+                } else if ((unum & 0xff) == 2) {
+                    obj2 = *sp++; // the second argument
+                    obj1 = *sp++; // the first argument
+                    *sp = rt_call_function_2(*sp, obj1, obj2);
+                } else {
+                    assert(0);
+                }
+                break;
+
+            case PYBC_CALL_METHOD:
+                DECODE_UINT;
+                assert((unum & 0xff00) == 0); // n_keyword
+                // switch on n_positional
+                if ((unum & 0xff) == 0) {
+                    obj1 = *sp++; // the self object (or NULL)
+                    *sp = rt_call_method_1(*sp, obj1);
+                } else if ((unum & 0xff) == 1) {
+                    obj2 = *sp++; // the first argument
+                    obj1 = *sp++; // the self object (or NULL)
+                    *sp = rt_call_function_2(*sp, obj1, obj2);
+                } else {
+                    assert(0);
+                }
+                break;
+
+            case PYBC_RETURN_VALUE:
+                return *sp;
+
+            default:
+                printf("code %p, offset %u, byte code 0x%02x not implemented\n", code, (uint)(ip - code), op);
+                assert(0);
+                return py_const_none;
+        }
+    }
+}
diff --git a/py/bc.h b/py/bc.h
new file mode 100644
index 0000000..f09843a
--- /dev/null
+++ b/py/bc.h
@@ -0,0 +1,97 @@
+#define PYBC_LOAD_CONST_FALSE   (0x10)
+#define PYBC_LOAD_CONST_NONE    (0x11)
+#define PYBC_LOAD_CONST_TRUE    (0x12)
+#define PYBC_LOAD_CONST_SMALL_INT   (0x13) // int
+#define PYBC_LOAD_CONST_INT     (0x14) // qstr
+#define PYBC_LOAD_CONST_DEC     (0x15) // qstr
+#define PYBC_LOAD_CONST_ID      (0x16) // qstr
+#define PYBC_LOAD_CONST_BYTES   (0x17) // qstr
+#define PYBC_LOAD_CONST_STRING  (0x18) // qstr
+
+#define PYBC_LOAD_FAST_0        (0x20)
+#define PYBC_LOAD_FAST_1        (0x21)
+#define PYBC_LOAD_FAST_2        (0x22)
+#define PYBC_LOAD_FAST_N        (0x23) // uint
+#define PYBC_LOAD_NAME          (0x24) // qstr
+#define PYBC_LOAD_GLOBAL        (0x25) // qstr
+#define PYBC_LOAD_ATTR          (0x26) // qstr
+#define PYBC_LOAD_METHOD        (0x27) // qstr
+#define PYBC_LOAD_BUILD_CLASS   (0x28)
+
+#define PYBC_STORE_FAST_0       (0x30)
+#define PYBC_STORE_FAST_1       (0x31)
+#define PYBC_STORE_FAST_2       (0x32)
+#define PYBC_STORE_FAST_N       (0x33) // uint
+#define PYBC_STORE_NAME         (0x34) // qstr
+#define PYBC_STORE_GLOBAL       (0x35) // qstr
+#define PYBC_STORE_ATTR         (0x36) // qstr
+#define PYBC_STORE_LOCALS       (0x37)
+#define PYBC_STORE_SUBSCR       (0x38)
+
+#define PYBC_DELETE_FAST_N      (0x39) // uint
+#define PYBC_DELETE_NAME        (0x3a) // qstr
+#define PYBC_DELETE_GLOBAL      (0x3b) // qstr
+#define PYBC_DELETE_DEREF       (0x3c) // qstr
+#define PYBC_DELETE_ATTR        (0x3d) // qstr
+#define PYBC_DELETE_SUBSCR      (0x3e)
+
+#define PYBC_DUP_TOP            (0x40)
+#define PYBC_DUP_TOP_TWO        (0x41)
+#define PYBC_POP_TOP            (0x42)
+#define PYBC_ROT_TWO            (0x43)
+#define PYBC_ROT_THREE          (0x44)
+#define PYBC_JUMP               (0x45) // pos
+#define PYBC_POP_JUMP_IF_TRUE   (0x46) // pos
+#define PYBC_POP_JUMP_IF_FALSE  (0x47) // pos
+#define PYBC_JUMP_IF_TRUE_OR_POP    (0x48) // pos
+#define PYBC_JUMP_IF_FALSE_OR_POP   (0x49) // pos
+#define PYBC_SETUP_LOOP         (0x4a) // pos
+#define PYBC_BREAK_LOOP         (0x4b) // pos
+#define PYBC_CONTINUE_LOOP      (0x4c) // pos
+#define PYBC_SETUP_WITH         (0x4d) // pos
+#define PYBC_WITH_CLEANUP       (0x4e)
+#define PYBC_SETUP_EXCEPT       (0x4f) // pos
+#define PYBC_SETUP_FINALLY      (0x50) // pos
+#define PYBC_END_FINALLY        (0x51)
+#define PYBC_GET_ITER           (0x52)
+#define PYBC_FOR_ITER           (0x53) // pos
+#define PYBC_POP_BLOCK          (0x54)
+#define PYBC_POP_EXCEPT         (0x55)
+
+#define PYBC_UNARY_OP           (0x60) // byte
+#define PYBC_BINARY_OP          (0x61) // byte
+#define PYBC_COMPARE_OP         (0x62) // byte
+
+#define PYBC_BUILD_TUPLE        (0x70) // uint
+#define PYBC_BUILD_LIST         (0x71) // uint
+#define PYBC_LIST_APPEND        (0x72) // uint
+#define PYBC_BUILD_MAP          (0x73) // uint
+#define PYBC_STORE_MAP          (0x74)
+#define PYBC_MAP_ADD            (0x75) // uint
+#define PYBC_BUILD_SET          (0x76) // uint
+#define PYBC_SET_ADD            (0x77) // uint
+#define PYBC_BUILD_SLICE        (0x78) // uint
+#define PYBC_UNPACK_SEQUENCE    (0x79) // uint
+#define PYBC_UNPACK_EX          (0x7a) // uint
+
+#define PYBC_RETURN_VALUE       (0x80)
+#define PYBC_RAISE_VARARGS      (0x81) // uint
+#define PYBC_YIELD_VALUE        (0x82)
+#define PYBC_YIELD_FROM         (0x83)
+
+#define PYBC_MAKE_FUNCTION      (0x90) // uint
+#define PYBC_MAKE_CLOSURE       (0x91) // uint?
+#define PYBC_CALL_FUNCTION      (0x92) // uint
+#define PYBC_CALL_FUNCTION_VAR  (0x93) // uint
+#define PYBC_CALL_FUNCTION_KW   (0x94) // uint
+#define PYBC_CALL_FUNCTION_VAR_KW   (0x95) // uint
+#define PYBC_CALL_METHOD        (0x96) // uint
+#define PYBC_CALL_METHOD_VAR    (0x97) // uint
+#define PYBC_CALL_METHOD_KW     (0x98) // uint
+#define PYBC_CALL_METHOD_VAR_KW (0x99) // uint
+
+#define PYBC_IMPORT_NAME (0xe0)
+#define PYBC_IMPORT_FROM (0xe1)
+#define PYBC_IMPORT_STAR (0xe2)
+
+py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args);
diff --git a/py/compile.c b/py/compile.c
new file mode 100644
index 0000000..0e6ce44
--- /dev/null
+++ b/py/compile.c
@@ -0,0 +1,2510 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "compile.h"
+#include "runtime.h"
+#include "emit.h"
+
+// TODO need to mangle __attr names
+
+typedef enum {
+    PN_none = 0,
+#define DEF_RULE(rule, comp, kind, arg...) PN_##rule,
+#include "grammar.h"
+#undef DEF_RULE
+    PN_maximum_number_of,
+} pn_kind_t;
+
+#define EMIT(fun, arg...) (emit_##fun(comp->emit, ##arg))
+
+typedef struct _compiler_t {
+    qstr qstr___class__;
+    qstr qstr___locals__;
+    qstr qstr___name__;
+    qstr qstr___module__;
+    qstr qstr___qualname__;
+    qstr qstr___doc__;
+    qstr qstr_assertion_error;
+
+    pass_kind_t pass;
+
+    int break_label;
+    int continue_label;
+    int except_nest_level;
+
+    int n_arg_keyword;
+    bool have_star_arg;
+    bool have_dbl_star_arg;
+    bool have_bare_star;
+    int param_pass;
+    int param_pass_num_dict_params;
+    int param_pass_num_default_params;
+
+    scope_t *scope_head;
+    scope_t *scope_cur;
+
+    emitter_t *emit;
+} compiler_t;
+
+py_parse_node_t fold_constants(py_parse_node_t pn) {
+    if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+
+        // fold arguments first
+        for (int i = 0; i < n; i++) {
+            pns->nodes[i] = fold_constants(pns->nodes[i]);
+        }
+
+        switch (PY_PARSE_NODE_STRUCT_KIND(pns)) {
+            case PN_shift_expr:
+                if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
+                    int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+                    int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
+                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_LESS)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 << arg1); // XXX can overflow; enabled only to compare with CPython
+                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_MORE)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 >> arg1);
+                    } else {
+                        // shouldn't happen
+                        assert(0);
+                    }
+                }
+                break;
+
+            case PN_arith_expr:
+                // XXX can overflow; enabled only to compare with CPython
+                if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
+                    int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+                    int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
+                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_PLUS)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 + arg1);
+                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_MINUS)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1);
+                    } else {
+                        // shouldn't happen
+                        assert(0);
+                    }
+                }
+                break;
+
+            case PN_term:
+                // XXX can overflow; enabled only to compare with CPython
+                if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
+                    int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+                    int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
+                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 * arg1);
+                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_SLASH)) {
+                        ; // pass
+                    //} else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_)) {
+                        //pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1);
+                    } else {
+                        // shouldn't happen
+                        assert(0);
+                    }
+                }
+                break;
+
+            case PN_factor_2:
+                if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) {
+                    machine_int_t arg = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
+                    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg);
+                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, -arg);
+                    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) {
+                        pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ~arg);
+                    } else {
+                        // shouldn't happen
+                        assert(0);
+                    }
+                }
+                break;
+
+            case PN_power:
+                // XXX can overflow; enabled only to compare with CPython
+                if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_NULL(pns->nodes[1]) && !PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+                    py_parse_node_struct_t* pns2 = (py_parse_node_struct_t*)pns->nodes[2];
+                    if (PY_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) {
+                        int power = PY_PARSE_NODE_LEAF_ARG(pns2->nodes[0]);
+                        if (power >= 0) {
+                            int ans = 1;
+                            int base = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+                            for (; power > 0; power--) {
+                                ans *= base;
+                            }
+                            pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ans);
+                        }
+                    }
+                }
+                break;
+        }
+    }
+
+    return pn;
+}
+
+void compile_node(compiler_t *comp, py_parse_node_t pn);
+
+scope_t *scope_new_and_link(compiler_t *comp, scope_kind_t kind, py_parse_node_t pn) {
+    scope_t *scope = scope_new(kind, pn);
+    scope->parent = comp->scope_cur;
+    scope->next = NULL;
+    if (comp->scope_head == NULL) {
+        comp->scope_head = scope;
+    } else {
+        scope_t *s = comp->scope_head;
+        while (s->next != NULL) {
+            s = s->next;
+        }
+        s->next = scope;
+    }
+    return scope;
+}
+
+int list_len(py_parse_node_t pn, int pn_kind) {
+    if (PY_PARSE_NODE_IS_NULL(pn)) {
+        return 0;
+    } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+        return 1;
+    } else {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
+            return 1;
+        } else {
+            return PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        }
+    }
+}
+
+void apply_to_single_or_list(compiler_t *comp, py_parse_node_t pn, int pn_list_kind, void (*f)(compiler_t*, py_parse_node_t)) {
+    if (PY_PARSE_NODE_IS_STRUCT(pn) && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)pn) == pn_list_kind) {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        for (int i = 0; i < num_nodes; i++) {
+            f(comp, pns->nodes[i]);
+        }
+    } else if (!PY_PARSE_NODE_IS_NULL(pn)) {
+        f(comp, pn);
+    }
+}
+
+int list_get(py_parse_node_t *pn, int pn_kind, py_parse_node_t **nodes) {
+    if (PY_PARSE_NODE_IS_NULL(*pn)) {
+        *nodes = NULL;
+        return 0;
+    } else if (PY_PARSE_NODE_IS_LEAF(*pn)) {
+        *nodes = pn;
+        return 1;
+    } else {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)(*pn);
+        if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
+            *nodes = pn;
+            return 1;
+        } else {
+            *nodes = pns->nodes;
+            return PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        }
+    }
+}
+
+void compile_do_nothing(compiler_t *comp, py_parse_node_struct_t *pns) {
+}
+
+void compile_generic_all_nodes(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    for (int i = 0; i < num_nodes; i++) {
+        compile_node(comp, pns->nodes[i]);
+    }
+}
+
+bool c_tuple_is_const(py_parse_node_t pn) {
+    if (!PY_PARSE_NODE_IS_LEAF(pn)) {
+        return false;
+    }
+    if (PY_PARSE_NODE_IS_ID(pn)) {
+        return false;
+    }
+    return true;
+}
+
+void c_tuple_emit_const(compiler_t *comp, py_parse_node_t pn) {
+    assert(PY_PARSE_NODE_IS_LEAF(pn));
+    int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+    switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
+        case PY_PARSE_NODE_ID: assert(0);
+        case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_verbatim_int, arg); break;
+        case PY_PARSE_NODE_INTEGER: EMIT(load_const_verbatim_str, qstr_str(arg)); break;
+        case PY_PARSE_NODE_DECIMAL: EMIT(load_const_verbatim_str, qstr_str(arg)); break;
+        case PY_PARSE_NODE_STRING: EMIT(load_const_verbatim_quoted_str, arg, false); break;
+        case PY_PARSE_NODE_BYTES: EMIT(load_const_verbatim_quoted_str, arg, true); break;
+        case PY_PARSE_NODE_TOKEN:
+            switch (arg) {
+                case PY_TOKEN_KW_FALSE: EMIT(load_const_verbatim_str, "False"); break;
+                case PY_TOKEN_KW_NONE: EMIT(load_const_verbatim_str, "None"); break;
+                case PY_TOKEN_KW_TRUE: EMIT(load_const_verbatim_str, "True"); break;
+                default: assert(0);
+            }
+            break;
+        default: assert(0);
+    }
+}
+
+// funnelling all tuple creations through this function and all this constant stuff is purely to agree with CPython
+void c_tuple(compiler_t *comp, py_parse_node_t pn, py_parse_node_struct_t *pns_list) {
+    int n = 0;
+    if (pns_list != NULL) {
+        n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_list);
+    }
+    int total = n;
+    bool is_const = true;
+    if (!PY_PARSE_NODE_IS_NULL(pn)) {
+        total += 1;
+        if (!c_tuple_is_const(pn)) {
+            is_const = false;
+        }
+    }
+    for (int i = 0; i < n; i++) {
+        if (!c_tuple_is_const(pns_list->nodes[i])) {
+            is_const = false;
+            break;
+        }
+    }
+    if (total > 0 && is_const) {
+        bool need_comma = false;
+        EMIT(load_const_verbatim_start);
+        EMIT(load_const_verbatim_str, "(");
+        if (!PY_PARSE_NODE_IS_NULL(pn)) {
+            c_tuple_emit_const(comp, pn);
+            need_comma = true;
+        }
+        for (int i = 0; i < n; i++) {
+            if (need_comma) {
+                EMIT(load_const_verbatim_str, ", ");
+            }
+            c_tuple_emit_const(comp, pns_list->nodes[i]);
+            need_comma = true;
+        }
+        if (total == 1) {
+            EMIT(load_const_verbatim_str, ",)");
+        } else {
+            EMIT(load_const_verbatim_str, ")");
+        }
+        EMIT(load_const_verbatim_end);
+    } else {
+        if (!PY_PARSE_NODE_IS_NULL(pn)) {
+            compile_node(comp, pn);
+        }
+        for (int i = 0; i < n; i++) {
+            compile_node(comp, pns_list->nodes[i]);
+        }
+        EMIT(build_tuple, total);
+    }
+}
+
+void compile_generic_tuple(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // a simple tuple expression
+    /*
+    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    for (int i = 0; i < n; i++) {
+        compile_node(comp, pns->nodes[i]);
+    }
+    EMIT(build_tuple, n);
+    */
+    c_tuple(comp, PY_PARSE_NODE_NULL, pns);
+}
+
+bool node_is_const_false(py_parse_node_t pn) {
+    return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_FALSE);
+    // untested: || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1);
+}
+
+bool node_is_const_true(py_parse_node_t pn) {
+    return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_TRUE) || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1);
+}
+
+// having c_if_cond_2 and the is_nested variable is purely to match with CPython, which doesn't fully optimise not's
+void c_if_cond_2(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label, bool is_nested) {
+    if (node_is_const_false(pn)) {
+        if (jump_if == false) {
+            EMIT(jump, label);
+        }
+        return;
+    } else if (node_is_const_true(pn)) {
+        if (jump_if == true) {
+            EMIT(jump, label);
+        }
+        return;
+    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_or_test) {
+            if (jump_if == false) {
+                int label2 = EMIT(label_new);
+                for (int i = 0; i < n - 1; i++) {
+                    c_if_cond_2(comp, pns->nodes[i], true, label2, true);
+                }
+                c_if_cond_2(comp, pns->nodes[n - 1], false, label, true);
+                EMIT(label_assign, label2);
+            } else {
+                for (int i = 0; i < n; i++) {
+                    c_if_cond_2(comp, pns->nodes[i], true, label, true);
+                }
+            }
+            return;
+        } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_and_test) {
+            if (jump_if == false) {
+                for (int i = 0; i < n; i++) {
+                    c_if_cond_2(comp, pns->nodes[i], false, label, true);
+                }
+            } else {
+                int label2 = EMIT(label_new);
+                for (int i = 0; i < n - 1; i++) {
+                    c_if_cond_2(comp, pns->nodes[i], false, label2, true);
+                }
+                c_if_cond_2(comp, pns->nodes[n - 1], true, label, true);
+                EMIT(label_assign, label2);
+            }
+            return;
+        } else if (!is_nested && PY_PARSE_NODE_STRUCT_KIND(pns) == PN_not_test_2) {
+            c_if_cond_2(comp, pns->nodes[0], !jump_if, label, true);
+            return;
+        }
+    }
+
+    // nothing special, fall back to default compiling for node and jump
+    compile_node(comp, pn);
+    if (jump_if == false) {
+        EMIT(pop_jump_if_false, label);
+    } else {
+        EMIT(pop_jump_if_true, label);
+    }
+}
+
+void c_if_cond(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label) {
+    c_if_cond_2(comp, pn, jump_if, label, false);
+}
+
+typedef enum { ASSIGN_STORE, ASSIGN_AUG_LOAD, ASSIGN_AUG_STORE } assign_kind_t;
+void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t kind);
+
+void c_assign_power(compiler_t *comp, py_parse_node_struct_t *pns, assign_kind_t assign_kind) {
+    if (assign_kind != ASSIGN_AUG_STORE) {
+        compile_node(comp, pns->nodes[0]);
+    }
+
+    if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+        py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+        if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) {
+            int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1);
+            if (assign_kind != ASSIGN_AUG_STORE) {
+                for (int i = 0; i < n - 1; i++) {
+                    compile_node(comp, pns1->nodes[i]);
+                }
+            }
+            assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1]));
+            pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1];
+        }
+        if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) {
+            printf("SyntaxError: can't assign to function call\n");
+            return;
+        } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) {
+            if (assign_kind == ASSIGN_AUG_STORE) {
+                EMIT(rot_three);
+                EMIT(store_subscr);
+            } else {
+                compile_node(comp, pns1->nodes[0]);
+                if (assign_kind == ASSIGN_AUG_LOAD) {
+                    EMIT(dup_top_two);
+                    EMIT(binary_op, RT_BINARY_OP_SUBSCR);
+                } else {
+                    EMIT(store_subscr);
+                }
+            }
+        } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) {
+            assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0]));
+            if (assign_kind == ASSIGN_AUG_LOAD) {
+                EMIT(dup_top);
+                EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0]));
+            } else {
+                if (assign_kind == ASSIGN_AUG_STORE) {
+                    EMIT(rot_two);
+                }
+                EMIT(store_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0]));
+            }
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+    } else {
+        // shouldn't happen
+        assert(0);
+    }
+
+    if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+        // SyntaxError, cannot assign
+        assert(0);
+    }
+}
+
+void c_assign_tuple(compiler_t *comp, int n, py_parse_node_t *nodes) {
+    assert(n >= 0);
+    int have_star_index = -1;
+    for (int i = 0; i < n; i++) {
+        if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_star_expr)) {
+            if (have_star_index < 0) {
+                EMIT(unpack_ex, i, n - i - 1);
+                have_star_index = i;
+            } else {
+                printf("SyntaxError: two starred expressions in assignment\n");
+                return;
+            }
+        }
+    }
+    if (have_star_index < 0) {
+        EMIT(unpack_sequence, n);
+    }
+    for (int i = 0; i < n; i++) {
+        if (i == have_star_index) {
+            c_assign(comp, ((py_parse_node_struct_t*)nodes[i])->nodes[0], ASSIGN_STORE);
+        } else {
+            c_assign(comp, nodes[i], ASSIGN_STORE);
+        }
+    }
+}
+
+// assigns top of stack to pn
+void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t assign_kind) {
+    tail_recursion:
+    if (PY_PARSE_NODE_IS_NULL(pn)) {
+        assert(0);
+    } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+        if (PY_PARSE_NODE_IS_ID(pn)) {
+            int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+            switch (assign_kind) {
+                case ASSIGN_STORE:
+                case ASSIGN_AUG_STORE:
+                    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, arg);
+                    break;
+                case ASSIGN_AUG_LOAD:
+                    emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg);
+                    break;
+            }
+        } else {
+            printf("SyntaxError: can't assign to literal\n");
+            return;
+        }
+    } else {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        switch (PY_PARSE_NODE_STRUCT_KIND(pns)) {
+            case PN_power:
+                // lhs is an index or attribute
+                c_assign_power(comp, pns, assign_kind);
+                break;
+
+            case PN_testlist_star_expr:
+            case PN_exprlist:
+                // lhs is a tuple
+                if (assign_kind != ASSIGN_STORE) {
+                    goto bad_aug;
+                }
+                c_assign_tuple(comp, PY_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes);
+                break;
+
+            case PN_atom_paren:
+                // lhs is something in parenthesis
+                if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+                    // empty tuple
+                    printf("SyntaxError: can't assign to ()\n");
+                    return;
+                } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+                    pns = (py_parse_node_struct_t*)pns->nodes[0];
+                    goto testlist_comp;
+                } else {
+                    // parenthesis around 1 item, is just that item
+                    pn = pns->nodes[0];
+                    goto tail_recursion;
+                }
+                break;
+
+            case PN_atom_bracket:
+                // lhs is something in brackets
+                if (assign_kind != ASSIGN_STORE) {
+                    goto bad_aug;
+                }
+                if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+                    // empty list, assignment allowed
+                    c_assign_tuple(comp, 0, NULL);
+                } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+                    pns = (py_parse_node_struct_t*)pns->nodes[0];
+                    goto testlist_comp;
+                } else {
+                    // brackets around 1 item
+                    c_assign_tuple(comp, 1, &pns->nodes[0]);
+                }
+                break;
+
+            default:
+                printf("unknown assign, %u\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns));
+                assert(0);
+        }
+        return;
+
+        testlist_comp:
+        // lhs is a sequence
+        if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+            py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+            if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) {
+                // sequence of one item, with trailing comma
+                assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0]));
+                c_assign_tuple(comp, 1, &pns->nodes[0]);
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) {
+                // sequence of many items
+                // TODO call c_assign_tuple instead
+                int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns2);
+                EMIT(unpack_sequence, 1 + n);
+                c_assign(comp, pns->nodes[0], ASSIGN_STORE);
+                for (int i = 0; i < n; i++) {
+                    c_assign(comp, pns2->nodes[i], ASSIGN_STORE);
+                }
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) {
+                // TODO not implemented
+                assert(0);
+            } else {
+                // sequence with 2 items
+                goto sequence_with_2_items;
+            }
+        } else {
+            // sequence with 2 items
+            sequence_with_2_items:
+            c_assign_tuple(comp, 2, pns->nodes);
+        }
+        return;
+    }
+    return;
+
+    bad_aug:
+    printf("SyntaxError: illegal expression for augmented assignment\n");
+}
+
+// stuff for lambda and comprehensions and generators
+void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int n_dict_params, int n_default_params) {
+    // make closed over variables, if any
+    int nfree = 0;
+    if (comp->scope_cur->kind != SCOPE_MODULE) {
+        for (int i = 0; i < this_scope->id_info_len; i++) {
+            id_info_t *id_info = &this_scope->id_info[i];
+            if (id_info->kind == ID_INFO_KIND_FREE) {
+                EMIT(load_closure, id_info->qstr);
+                nfree += 1;
+            }
+        }
+    }
+    if (nfree > 0) {
+        EMIT(build_tuple, nfree);
+    }
+
+    // make the function/closure
+    if (nfree == 0) {
+        EMIT(make_function, this_scope, n_dict_params, n_default_params);
+    } else {
+        EMIT(make_closure, this_scope, n_dict_params, n_default_params);
+    }
+}
+
+void compile_funcdef_param(compiler_t *comp, py_parse_node_t pn) {
+    assert(PY_PARSE_NODE_IS_STRUCT(pn));
+    py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+    if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_name) {
+        if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+            // this parameter has a default value
+            // in CPython, None (and True, False?) as default parameters are loaded with LOAD_NAME; don't understandy why
+            if (comp->have_bare_star) {
+                comp->param_pass_num_dict_params += 1;
+                if (comp->param_pass == 1) {
+                    EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+                    compile_node(comp, pns->nodes[2]);
+                }
+            } else {
+                comp->param_pass_num_default_params += 1;
+                if (comp->param_pass == 2) {
+                    compile_node(comp, pns->nodes[2]);
+                }
+            }
+        }
+    } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_star) {
+        if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+            // bare star
+            comp->have_bare_star = true;
+        }
+    }
+}
+
+// leaves function object on stack
+// returns function name
+qstr compile_funcdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->pass == PASS_1) {
+        // create a new scope for this function
+        scope_t *s = scope_new_and_link(comp, SCOPE_FUNCTION, (py_parse_node_t)pns);
+        // store the function scope so the compiling function can use it at each pass
+        pns->nodes[4] = (py_parse_node_t)s;
+    }
+
+    // save variables (probably don't need to do this, since we can't have nested definitions..?)
+    bool old_have_bare_star = comp->have_bare_star;
+    int old_param_pass = comp->param_pass;
+    int old_param_pass_num_dict_params = comp->param_pass_num_dict_params;
+    int old_param_pass_num_default_params = comp->param_pass_num_default_params;
+
+    // compile default parameters
+    comp->have_bare_star = false;
+    comp->param_pass = 1; // pass 1 does any default parameters after bare star
+    comp->param_pass_num_dict_params = 0;
+    comp->param_pass_num_default_params = 0;
+    apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param);
+    comp->have_bare_star = false;
+    comp->param_pass = 2; // pass 2 does any default parameters before bare star
+    comp->param_pass_num_dict_params = 0;
+    comp->param_pass_num_default_params = 0;
+    apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param);
+
+    // get the scope for this function
+    scope_t *fscope = (scope_t*)pns->nodes[4];
+
+    // make the function
+    close_over_variables_etc(comp, fscope, comp->param_pass_num_dict_params, comp->param_pass_num_default_params);
+
+    // restore variables
+    comp->have_bare_star = old_have_bare_star;
+    comp->param_pass = old_param_pass;
+    comp->param_pass_num_dict_params = old_param_pass_num_dict_params;
+    comp->param_pass_num_default_params = old_param_pass_num_default_params;
+
+    // return its name (the 'f' in "def f(...):")
+    return fscope->simple_name;
+}
+
+// leaves class object on stack
+// returns class name
+qstr compile_classdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->pass == PASS_1) {
+        // create a new scope for this class
+        scope_t *s = scope_new_and_link(comp, SCOPE_CLASS, (py_parse_node_t)pns);
+        // store the class scope so the compiling function can use it at each pass
+        pns->nodes[3] = (py_parse_node_t)s;
+    }
+
+    EMIT(load_build_class);
+
+    // scope for this class
+    scope_t *cscope = (scope_t*)pns->nodes[3];
+
+    // compile the class
+    close_over_variables_etc(comp, cscope, 0, 0);
+
+    // get its name
+    EMIT(load_const_id, cscope->simple_name);
+
+    // nodes[1] has parent classes, if any
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+        // no parent classes
+        EMIT(call_function, 2, 0, false, false);
+    } else {
+        // have a parent class or classes
+        // TODO what if we have, eg, *a or **a in the parent list?
+        compile_node(comp, pns->nodes[1]);
+        EMIT(call_function, 2 + list_len(pns->nodes[1], PN_arglist), 0, false, false);
+    }
+
+    // return its name (the 'C' in class C(...):")
+    return cscope->simple_name;
+}
+
+void compile_decorated(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // get the list of decorators
+    py_parse_node_t *nodes;
+    int n = list_get(&pns->nodes[0], PN_decorators, &nodes);
+
+    // load each decorator
+    for (int i = 0; i < n; i++) {
+        assert(PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_decorator)); // should be
+        py_parse_node_struct_t *pns_decorator = (py_parse_node_struct_t*)nodes[i];
+        py_parse_node_t *nodes2;
+        int n2 = list_get(&pns_decorator->nodes[0], PN_dotted_name, &nodes2);
+        compile_node(comp, nodes2[0]);
+        for (int i = 1; i < n2; i++) {
+            EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(nodes2[i]));
+        }
+        if (!PY_PARSE_NODE_IS_NULL(pns_decorator->nodes[1])) {
+            // first call the function with these arguments
+            compile_node(comp, pns_decorator->nodes[1]);
+        }
+    }
+
+    // compile the body (funcdef or classdef) and get its name
+    py_parse_node_struct_t *pns_body = (py_parse_node_struct_t*)pns->nodes[1];
+    qstr body_name = 0;
+    if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_funcdef) {
+        body_name = compile_funcdef_helper(comp, pns_body);
+    } else if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_classdef) {
+        body_name = compile_classdef_helper(comp, pns_body);
+    } else {
+        // shouldn't happen
+        assert(0);
+    }
+
+    // call each decorator
+    for (int i = 0; i < n; i++) {
+        EMIT(call_function, 1, 0, false, false);
+    }
+
+    // store func/class object into name
+    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, body_name);
+}
+
+void compile_funcdef(compiler_t *comp, py_parse_node_struct_t *pns) {
+    qstr fname = compile_funcdef_helper(comp, pns);
+    // store function object into function name
+    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, fname);
+}
+
+void c_del_stmt(compiler_t *comp, py_parse_node_t pn) {
+    if (PY_PARSE_NODE_IS_ID(pn)) {
+        emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pn));
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_power)) {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+
+        compile_node(comp, pns->nodes[0]); // base of the power node
+
+        if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+            py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+            if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) {
+                int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1);
+                for (int i = 0; i < n - 1; i++) {
+                    compile_node(comp, pns1->nodes[i]);
+                }
+                assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1]));
+                pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1];
+            }
+            if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) {
+                // SyntaxError: can't delete a function call
+                assert(0);
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) {
+                compile_node(comp, pns1->nodes[0]);
+                EMIT(delete_subscr);
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) {
+                assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0]));
+                EMIT(delete_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0]));
+            } else {
+                // shouldn't happen
+                assert(0);
+            }
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+
+        if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+            // SyntaxError, cannot delete
+            assert(0);
+        }
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_atom_paren)) {
+        pn = ((py_parse_node_struct_t*)pn)->nodes[0];
+        if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_testlist_comp)) {
+            py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+            // TODO perhaps factorise testlist_comp code with other uses of PN_testlist_comp
+
+            if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+                py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+                if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3b) {
+                    // sequence of one item, with trailing comma
+                    assert(PY_PARSE_NODE_IS_NULL(pns1->nodes[0]));
+                    c_del_stmt(comp, pns->nodes[0]);
+                } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3c) {
+                    // sequence of many items
+                    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1);
+                    c_del_stmt(comp, pns->nodes[0]);
+                    for (int i = 0; i < n; i++) {
+                        c_del_stmt(comp, pns1->nodes[i]);
+                    }
+                } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) {
+                    // TODO not implemented; can't del comprehension?
+                    assert(0);
+                } else {
+                    // sequence with 2 items
+                    goto sequence_with_2_items;
+                }
+            } else {
+                // sequence with 2 items
+                sequence_with_2_items:
+                c_del_stmt(comp, pns->nodes[0]);
+                c_del_stmt(comp, pns->nodes[1]);
+            }
+        } else {
+            // tuple with 1 element
+            c_del_stmt(comp, pn);
+        }
+    } else {
+        // not implemented
+        assert(0);
+    }
+}
+
+void compile_del_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    apply_to_single_or_list(comp, pns->nodes[0], PN_exprlist, c_del_stmt);
+}
+
+void compile_break_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->break_label == 0) {
+        printf("ERROR: cannot break from here\n");
+    }
+    EMIT(break_loop, comp->break_label);
+}
+
+void compile_continue_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->continue_label == 0) {
+        printf("ERROR: cannot continue from here\n");
+    }
+    if (comp->except_nest_level > 0) {
+        EMIT(continue_loop, comp->continue_label);
+    } else {
+        EMIT(jump, comp->continue_label);
+    }
+}
+
+void compile_return_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+        EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_test_if_expr)) {
+        // special case when returning an if-expression; to match CPython optimisation
+        py_parse_node_struct_t *pns_test_if_expr = (py_parse_node_struct_t*)pns->nodes[0];
+        py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns_test_if_expr->nodes[1];
+
+        int l_fail = EMIT(label_new);
+        c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition
+        compile_node(comp, pns_test_if_expr->nodes[0]); // success value
+        EMIT(return_value);
+        EMIT(label_assign, l_fail);
+        compile_node(comp, pns_test_if_else->nodes[1]); // failure value
+    } else {
+        compile_node(comp, pns->nodes[0]);
+    }
+    EMIT(return_value);
+}
+
+void compile_yield_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    compile_node(comp, pns->nodes[0]);
+    EMIT(pop_top);
+}
+
+void compile_raise_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+        // raise
+        EMIT(raise_varargs, 0);
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_raise_stmt_arg)) {
+        // raise x from y
+        pns = (py_parse_node_struct_t*)pns->nodes[0];
+        compile_node(comp, pns->nodes[0]);
+        compile_node(comp, pns->nodes[1]);
+        EMIT(raise_varargs, 2);
+    } else {
+        // raise x
+        compile_node(comp, pns->nodes[0]);
+        EMIT(raise_varargs, 1);
+    }
+}
+
+// q1 holds the base, q2 the full name
+// eg   a -> q1=q2=a
+//      a.b.c -> q1=a, q2=a.b.c
+void do_import_name(compiler_t *comp, py_parse_node_t pn, qstr *q1, qstr *q2) {
+    bool is_as = false;
+    if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dotted_as_name)) {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        // a name of the form x as y; unwrap it
+        *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
+        pn = pns->nodes[0];
+        is_as = true;
+    }
+    if (PY_PARSE_NODE_IS_ID(pn)) {
+        // just a simple name
+        *q2 = PY_PARSE_NODE_LEAF_ARG(pn);
+        if (!is_as) {
+            *q1 = *q2;
+        }
+        EMIT(import_name, *q2);
+    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dotted_name) {
+            // a name of the form a.b.c
+            if (!is_as) {
+                *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+            }
+            int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+            int len = n - 1;
+            for (int i = 0; i < n; i++) {
+                len += strlen(qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])));
+            }
+            char *str = m_new(char, len + 1);
+            str[0] = 0;
+            for (int i = 0; i < n; i++) {
+                if (i > 0) {
+                    strcat(str, ".");
+                }
+                strcat(str, qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])));
+            }
+            *q2 = qstr_from_str_take(str);
+            EMIT(import_name, *q2);
+            if (is_as) {
+                for (int i = 1; i < n; i++) {
+                    EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+                }
+            }
+        } else {
+            // TODO not implemented
+            assert(0);
+        }
+    } else {
+        // TODO not implemented
+        assert(0);
+    }
+}
+
+void compile_dotted_as_name(compiler_t *comp, py_parse_node_t pn) {
+    EMIT(load_const_small_int, 0); // ??
+    EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+    qstr q1, q2;
+    do_import_name(comp, pn, &q1, &q2);
+    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, q1);
+}
+
+void compile_import_name(compiler_t *comp, py_parse_node_struct_t *pns) {
+    apply_to_single_or_list(comp, pns->nodes[0], PN_dotted_as_names, compile_dotted_as_name);
+}
+
+void compile_import_from(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) {
+        EMIT(load_const_small_int, 0); // what's this for??
+        EMIT(load_const_verbatim_start);
+        EMIT(load_const_verbatim_str, "('*',)");
+        EMIT(load_const_verbatim_end);
+        qstr dummy_q, id1;
+        do_import_name(comp, pns->nodes[0], &dummy_q, &id1);
+        EMIT(import_star);
+    } else {
+        py_parse_node_t *pn_nodes;
+        int n = list_get(&pns->nodes[1], PN_import_as_names, &pn_nodes);
+
+        EMIT(load_const_small_int, 0); // what's this for??
+        EMIT(load_const_verbatim_start);
+        EMIT(load_const_verbatim_str, "(");
+        for (int i = 0; i < n; i++) {
+            assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name));
+            py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i];
+            qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id
+            if (i > 0) {
+                EMIT(load_const_verbatim_str, ", ");
+            }
+            EMIT(load_const_verbatim_str, "'");
+            EMIT(load_const_verbatim_str, qstr_str(id2));
+            EMIT(load_const_verbatim_str, "'");
+        }
+        if (n == 1) {
+            EMIT(load_const_verbatim_str, ",");
+        }
+        EMIT(load_const_verbatim_str, ")");
+        EMIT(load_const_verbatim_end);
+        qstr dummy_q, id1;
+        do_import_name(comp, pns->nodes[0], &dummy_q, &id1);
+        for (int i = 0; i < n; i++) {
+            assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name));
+            py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i];
+            qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id
+            EMIT(import_from, id2);
+            if (PY_PARSE_NODE_IS_NULL(pns3->nodes[1])) {
+                emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, id2);
+            } else {
+                emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1]));
+            }
+        }
+        EMIT(pop_top);
+    }
+}
+
+void compile_global_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) {
+        emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+    } else {
+        pns = (py_parse_node_struct_t*)pns->nodes[0];
+        int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        for (int i = 0; i < num_nodes; i++) {
+            emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+        }
+    }
+}
+
+void compile_nonlocal_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) {
+        emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+    } else {
+        pns = (py_parse_node_struct_t*)pns->nodes[0];
+        int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        for (int i = 0; i < num_nodes; i++) {
+            emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+        }
+    }
+}
+
+void compile_assert_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int l_end = EMIT(label_new);
+    c_if_cond(comp, pns->nodes[0], true, l_end);
+    emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr_assertion_error);
+    if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+        // assertion message
+        compile_node(comp, pns->nodes[1]);
+        EMIT(call_function, 1, 0, false, false);
+    }
+    EMIT(raise_varargs, 1);
+    EMIT(label_assign, l_end);
+}
+
+void compile_if_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // TODO proper and/or short circuiting
+
+    int l_end = EMIT(label_new);
+
+    int l_fail = EMIT(label_new);
+    c_if_cond(comp, pns->nodes[0], false, l_fail); // if condition
+
+    compile_node(comp, pns->nodes[1]); // if block
+    //if (!(PY_PARSE_NODE_IS_NULL(pns->nodes[2]) && PY_PARSE_NODE_IS_NULL(pns->nodes[3]))) { // optimisation; doesn't align with CPython
+        // jump over elif/else blocks if they exist
+        if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython
+            EMIT(jump, l_end);
+        }
+    //}
+    EMIT(label_assign, l_fail);
+
+    if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) {
+        // compile elif blocks
+
+        py_parse_node_struct_t *pns_elif = (py_parse_node_struct_t*)pns->nodes[2];
+
+        if (PY_PARSE_NODE_STRUCT_KIND(pns_elif) == PN_if_stmt_elif_list) {
+            // multiple elif blocks
+
+            int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_elif);
+            for (int i = 0; i < n; i++) {
+                py_parse_node_struct_t *pns_elif2 = (py_parse_node_struct_t*)pns_elif->nodes[i];
+                l_fail = EMIT(label_new);
+                c_if_cond(comp, pns_elif2->nodes[0], false, l_fail); // elif condition
+
+                compile_node(comp, pns_elif2->nodes[1]); // elif block
+                if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython
+                    EMIT(jump, l_end);
+                }
+                EMIT(label_assign, l_fail);
+            }
+
+        } else {
+            // a single elif block
+
+            l_fail = EMIT(label_new);
+            c_if_cond(comp, pns_elif->nodes[0], false, l_fail); // elif condition
+
+            compile_node(comp, pns_elif->nodes[1]); // elif block
+            if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython
+                EMIT(jump, l_end);
+            }
+            EMIT(label_assign, l_fail);
+        }
+    }
+
+    // compile else block
+    compile_node(comp, pns->nodes[3]); // can be null
+
+    EMIT(label_assign, l_end);
+}
+
+void compile_while_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int old_break_label = comp->break_label;
+    int old_continue_label = comp->continue_label;
+
+    int done_label = EMIT(label_new);
+    int end_label = EMIT(label_new);
+    int break_label = EMIT(label_new);
+    int continue_label = EMIT(label_new);
+
+    comp->break_label = break_label;
+    comp->continue_label = continue_label;
+
+    EMIT(setup_loop, end_label);
+    EMIT(label_assign, continue_label);
+    c_if_cond(comp, pns->nodes[0], false, done_label); // condition
+    compile_node(comp, pns->nodes[1]); // body
+    if (!emit_last_emit_was_return_value(comp->emit)) {
+        EMIT(jump, continue_label);
+    }
+    EMIT(label_assign, done_label);
+
+    // break/continue apply to outer loop (if any) in the else block
+    comp->break_label = old_break_label;
+    comp->continue_label = old_continue_label;
+
+    // CPython does not emit POP_BLOCK if the condition was a constant; don't undertand why
+    // this is a small hack to agree with CPython
+    if (!node_is_const_true(pns->nodes[0])) {
+        EMIT(pop_block);
+    }
+
+    compile_node(comp, pns->nodes[2]); // else
+
+    EMIT(label_assign, break_label);
+    EMIT(label_assign, end_label);
+}
+
+void compile_for_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int old_break_label = comp->break_label;
+    int old_continue_label = comp->continue_label;
+
+    int for_label = EMIT(label_new);
+    int pop_label = EMIT(label_new);
+    int end_label = EMIT(label_new);
+
+    int break_label = EMIT(label_new);
+
+    comp->continue_label = for_label;
+    comp->break_label = break_label;
+
+    EMIT(setup_loop, end_label);
+    compile_node(comp, pns->nodes[1]); // iterator
+    EMIT(get_iter);
+    EMIT(label_assign, for_label);
+    EMIT(for_iter, pop_label);
+    c_assign(comp, pns->nodes[0], ASSIGN_STORE); // variable
+    compile_node(comp, pns->nodes[2]); // body
+    if (!emit_last_emit_was_return_value(comp->emit)) {
+        EMIT(jump, for_label);
+    }
+    EMIT(label_assign, pop_label);
+    EMIT(for_iter_end);
+
+    // break/continue apply to outer loop (if any) in the else block
+    comp->break_label = old_break_label;
+    comp->continue_label = old_continue_label;
+
+    EMIT(pop_block);
+
+    compile_node(comp, pns->nodes[3]); // else (not tested)
+
+    EMIT(label_assign, break_label);
+    EMIT(label_assign, end_label);
+}
+
+void compile_try_except(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_excepts, py_parse_node_t pn_else) {
+    // this function is a bit of a hack at the moment
+    // don't understand how the stack works with exceptions, so we force it to return to the correct value
+
+    // setup code
+    int stack_size = EMIT(get_stack_size);
+    int l1 = EMIT(label_new);
+    int success_label = EMIT(label_new);
+    comp->except_nest_level += 1; // for correct handling of continue
+    EMIT(setup_except, l1);
+    compile_node(comp, pn_body); // body
+    EMIT(pop_block);
+    EMIT(jump, success_label);
+    EMIT(label_assign, l1);
+    int l2 = EMIT(label_new);
+
+    for (int i = 0; i < n_except; i++) {
+        assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_excepts[i], PN_try_stmt_except)); // should be
+        py_parse_node_struct_t *pns_except = (py_parse_node_struct_t*)pn_excepts[i];
+
+        qstr qstr_exception_local = 0;
+        int end_finally_label = EMIT(label_new);
+
+        if (PY_PARSE_NODE_IS_NULL(pns_except->nodes[0])) {
+            // this is a catch all exception handler
+            if (i + 1 != n_except) {
+                printf("SyntaxError: default 'except:' must be last\n");
+                return;
+            }
+        } else {
+            // this exception handler requires a match to a certain type of exception
+            py_parse_node_t pns_exception_expr = pns_except->nodes[0];
+            if (PY_PARSE_NODE_IS_STRUCT(pns_exception_expr)) {
+                py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns_exception_expr;
+                if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_try_stmt_as_name) {
+                    // handler binds the exception to a local
+                    pns_exception_expr = pns3->nodes[0];
+                    qstr_exception_local = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1]);
+                }
+            }
+            EMIT(dup_top);
+            compile_node(comp, pns_exception_expr);
+            EMIT(compare_op, RT_COMPARE_OP_EXCEPTION_MATCH);
+            EMIT(pop_jump_if_false, end_finally_label);
+        }
+
+        EMIT(pop_top);
+
+        if (qstr_exception_local == 0) {
+            EMIT(pop_top);
+        } else {
+            emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local);
+        }
+
+        EMIT(pop_top);
+
+        int l3;
+        if (qstr_exception_local != 0) {
+            l3 = EMIT(label_new);
+            EMIT(setup_finally, l3);
+        }
+        compile_node(comp, pns_except->nodes[1]);
+        if (qstr_exception_local != 0) {
+            EMIT(pop_block);
+        }
+        EMIT(pop_except);
+        if (qstr_exception_local != 0) {
+            EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+            EMIT(label_assign, l3);
+            EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+            emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local);
+            emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local);
+            EMIT(end_finally);
+        }
+        EMIT(jump, l2);
+        EMIT(label_assign, end_finally_label);
+    }
+
+    EMIT(end_finally);
+    EMIT(label_assign, success_label);
+    comp->except_nest_level -= 1;
+    compile_node(comp, pn_else); // else block, can be null
+    EMIT(label_assign, l2);
+    EMIT(set_stack_size, stack_size);
+}
+
+void compile_try_finally(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_except, py_parse_node_t pn_else, py_parse_node_t pn_finally) {
+    // don't understand how the stack works with exceptions, so we force it to return to the correct value
+    int stack_size = EMIT(get_stack_size);
+    int l_finally_block = EMIT(label_new);
+    EMIT(setup_finally, l_finally_block);
+    if (n_except == 0) {
+        assert(PY_PARSE_NODE_IS_NULL(pn_else));
+        compile_node(comp, pn_body);
+    } else {
+        compile_try_except(comp, pn_body, n_except, pn_except, pn_else);
+    }
+    EMIT(pop_block);
+    EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+    EMIT(label_assign, l_finally_block);
+    compile_node(comp, pn_finally);
+    EMIT(end_finally);
+    EMIT(set_stack_size, stack_size);
+}
+
+void compile_try_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+        py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+        if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_finally) {
+            // just try-finally
+            compile_try_finally(comp, pns->nodes[0], 0, NULL, PY_PARSE_NODE_NULL, pns2->nodes[0]);
+        } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_except_and_more) {
+            // try-except and possibly else and/or finally
+            py_parse_node_t *pn_excepts;
+            int n_except = list_get(&pns2->nodes[0], PN_try_stmt_except_list, &pn_excepts);
+            if (PY_PARSE_NODE_IS_NULL(pns2->nodes[2])) {
+                // no finally
+                compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1]);
+            } else {
+                // have finally
+                compile_try_finally(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1], ((py_parse_node_struct_t*)pns2->nodes[2])->nodes[0]);
+            }
+        } else {
+            // just try-except
+            py_parse_node_t *pn_excepts;
+            int n_except = list_get(&pns->nodes[1], PN_try_stmt_except_list, &pn_excepts);
+            compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, PY_PARSE_NODE_NULL);
+        }
+    } else {
+        // shouldn't happen
+        assert(0);
+    }
+}
+
+void compile_with_stmt_helper(compiler_t *comp, int n, py_parse_node_t *nodes, py_parse_node_t body) {
+    if (n == 0) {
+        // no more pre-bits, compile the body of the with
+        compile_node(comp, body);
+    } else {
+        int l_end = EMIT(label_new);
+        if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[0], PN_with_item)) {
+            // this pre-bit is of the form "a as b"
+            py_parse_node_struct_t *pns = (py_parse_node_struct_t*)nodes[0];
+            compile_node(comp, pns->nodes[0]);
+            EMIT(setup_with, l_end);
+            c_assign(comp, pns->nodes[1], ASSIGN_STORE);
+        } else {
+            // this pre-bit is just an expression
+            compile_node(comp, nodes[0]);
+            EMIT(setup_with, l_end);
+            EMIT(pop_top);
+        }
+        // compile additional pre-bits and the body
+        compile_with_stmt_helper(comp, n - 1, nodes + 1, body);
+        // finish this with block
+        EMIT(pop_block);
+        EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        EMIT(label_assign, l_end);
+        EMIT(with_cleanup);
+        EMIT(end_finally);
+    }
+}
+
+void compile_with_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // get the nodes for the pre-bit of the with (the a as b, c as d, ... bit)
+    py_parse_node_t *nodes;
+    int n = list_get(&pns->nodes[0], PN_with_stmt_list, &nodes);
+    assert(n > 0);
+
+    // compile in a nested fashion
+    compile_with_stmt_helper(comp, n, nodes, pns->nodes[1]);
+}
+
+void compile_expr_stmt(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+        if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !PY_PARSE_NODE_IS_ID(pns->nodes[0])) {
+            // do nothing with a lonely constant
+        } else {
+            compile_node(comp, pns->nodes[0]); // just an expression
+            EMIT(pop_top); // discard last result since this is a statement and leaves nothing on the stack
+        }
+    } else {
+        py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+        int kind = PY_PARSE_NODE_STRUCT_KIND(pns1);
+        if (kind == PN_expr_stmt_augassign) {
+            c_assign(comp, pns->nodes[0], ASSIGN_AUG_LOAD); // lhs load for aug assign
+            compile_node(comp, pns1->nodes[1]); // rhs
+            assert(PY_PARSE_NODE_IS_TOKEN(pns1->nodes[0]));
+            // note that we don't really need to implement separate inplace ops, just normal binary ops will suffice
+            switch (PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])) {
+                case PY_TOKEN_DEL_PIPE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_OR); break;
+                case PY_TOKEN_DEL_CARET_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_XOR); break;
+                case PY_TOKEN_DEL_AMPERSAND_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_AND); break;
+                case PY_TOKEN_DEL_DBL_LESS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_LSHIFT); break;
+                case PY_TOKEN_DEL_DBL_MORE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_RSHIFT); break;
+                case PY_TOKEN_DEL_PLUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_ADD); break;
+                case PY_TOKEN_DEL_MINUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_SUBTRACT); break;
+                case PY_TOKEN_DEL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MULTIPLY); break;
+                case PY_TOKEN_DEL_DBL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_FLOOR_DIVIDE); break;
+                case PY_TOKEN_DEL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_TRUE_DIVIDE); break;
+                case PY_TOKEN_DEL_PERCENT_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MODULO); break;
+                case PY_TOKEN_DEL_DBL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_POWER); break;
+                default: assert(0); // shouldn't happen
+            }
+            c_assign(comp, pns->nodes[0], ASSIGN_AUG_STORE); // lhs store for aug assign
+        } else if (kind == PN_expr_stmt_assign_list) {
+            int rhs = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1) - 1;
+            compile_node(comp, ((py_parse_node_struct_t*)pns1->nodes[rhs])->nodes[0]); // rhs
+            // following CPython, we store left-most first
+            if (rhs > 0) {
+                EMIT(dup_top);
+            }
+            c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store
+            for (int i = 0; i < rhs; i++) {
+                if (i + 1 < rhs) {
+                    EMIT(dup_top);
+                }
+                c_assign(comp, ((py_parse_node_struct_t*)pns1->nodes[i])->nodes[0], ASSIGN_STORE); // middle store
+            }
+        } else if (kind == PN_expr_stmt_assign) {
+            if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr)
+                && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr)
+                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 2
+                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 2) {
+                // optimisation for a, b = c, d; to match CPython's optimisation
+                py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0];
+                py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0];
+                compile_node(comp, pns10->nodes[0]); // rhs
+                compile_node(comp, pns10->nodes[1]); // rhs
+                EMIT(rot_two);
+                c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store
+                c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store
+            } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr)
+                && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr)
+                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 3
+                && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 3) {
+                // optimisation for a, b, c = d, e, f; to match CPython's optimisation
+                py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0];
+                py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0];
+                compile_node(comp, pns10->nodes[0]); // rhs
+                compile_node(comp, pns10->nodes[1]); // rhs
+                compile_node(comp, pns10->nodes[2]); // rhs
+                EMIT(rot_three);
+                EMIT(rot_two);
+                c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store
+                c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store
+                c_assign(comp, pns0->nodes[2], ASSIGN_STORE); // lhs store
+            } else {
+                compile_node(comp, pns1->nodes[0]); // rhs
+                c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store
+            }
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+    }
+}
+
+void c_binary_op(compiler_t *comp, py_parse_node_struct_t *pns, rt_binary_op_t binary_op) {
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    compile_node(comp, pns->nodes[0]);
+    for (int i = 1; i < num_nodes; i += 1) {
+        compile_node(comp, pns->nodes[i]);
+        EMIT(binary_op, binary_op);
+    }
+}
+
+void compile_test_if_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_test_if_else));
+    py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns->nodes[1];
+
+    int stack_size = EMIT(get_stack_size);
+    int l_fail = EMIT(label_new);
+    int l_end = EMIT(label_new);
+    c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition
+    compile_node(comp, pns->nodes[0]); // success value
+    EMIT(jump, l_end);
+    EMIT(label_assign, l_fail);
+    EMIT(set_stack_size, stack_size); // force stack size reset
+    compile_node(comp, pns_test_if_else->nodes[1]); // failure value
+    EMIT(label_assign, l_end);
+}
+
+void compile_lambdef(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // TODO default params etc for lambda; possibly just use funcdef code
+    //py_parse_node_t pn_params = pns->nodes[0];
+    //py_parse_node_t pn_body = pns->nodes[1];
+
+    if (comp->pass == PASS_1) {
+        // create a new scope for this lambda
+        scope_t *s = scope_new_and_link(comp, SCOPE_LAMBDA, (py_parse_node_t)pns);
+        // store the lambda scope so the compiling function (this one) can use it at each pass
+        pns->nodes[2] = (py_parse_node_t)s;
+    }
+
+    // get the scope for this lambda
+    scope_t *this_scope = (scope_t*)pns->nodes[2];
+
+    // make the lambda
+    close_over_variables_etc(comp, this_scope, 0, 0);
+}
+
+void compile_or_test(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int l_end = EMIT(label_new);
+    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    for (int i = 0; i < n; i += 1) {
+        compile_node(comp, pns->nodes[i]);
+        if (i + 1 < n) {
+            EMIT(jump_if_true_or_pop, l_end);
+        }
+    }
+    EMIT(label_assign, l_end);
+}
+
+void compile_and_test(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int l_end = EMIT(label_new);
+    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    for (int i = 0; i < n; i += 1) {
+        compile_node(comp, pns->nodes[i]);
+        if (i + 1 < n) {
+            EMIT(jump_if_false_or_pop, l_end);
+        }
+    }
+    EMIT(label_assign, l_end);
+}
+
+void compile_not_test_2(compiler_t *comp, py_parse_node_struct_t *pns) {
+    compile_node(comp, pns->nodes[0]);
+    EMIT(unary_op, RT_UNARY_OP_NOT);
+}
+
+void compile_comparison(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int stack_size = EMIT(get_stack_size);
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    compile_node(comp, pns->nodes[0]);
+    bool multi = (num_nodes > 3);
+    int l_fail = 0;
+    if (multi) {
+        l_fail = EMIT(label_new);
+    }
+    for (int i = 1; i + 1 < num_nodes; i += 2) {
+        compile_node(comp, pns->nodes[i + 1]);
+        if (i + 2 < num_nodes) {
+            EMIT(dup_top);
+            EMIT(rot_three);
+        }
+        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS)) {
+            EMIT(compare_op, RT_COMPARE_OP_LESS);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE)) {
+            EMIT(compare_op, RT_COMPARE_OP_MORE);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_EQUAL)) {
+            EMIT(compare_op, RT_COMPARE_OP_EQUAL);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS_EQUAL)) {
+            EMIT(compare_op, RT_COMPARE_OP_LESS_EQUAL);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE_EQUAL)) {
+            EMIT(compare_op, RT_COMPARE_OP_MORE_EQUAL);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_NOT_EQUAL)) {
+            EMIT(compare_op, RT_COMPARE_OP_NOT_EQUAL);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_KW_IN)) {
+            EMIT(compare_op, RT_COMPARE_OP_IN);
+        } else if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[i])) {
+            py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[i];
+            int kind = PY_PARSE_NODE_STRUCT_KIND(pns2);
+            if (kind == PN_comp_op_not_in) {
+                EMIT(compare_op, RT_COMPARE_OP_NOT_IN);
+            } else if (kind == PN_comp_op_is) {
+                if (PY_PARSE_NODE_IS_NULL(pns2->nodes[0])) {
+                    EMIT(compare_op, RT_COMPARE_OP_IS);
+                } else {
+                    EMIT(compare_op, RT_COMPARE_OP_IS_NOT);
+                }
+            } else {
+                // shouldn't happen
+                assert(0);
+            }
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+        if (i + 2 < num_nodes) {
+            EMIT(jump_if_false_or_pop, l_fail);
+        }
+    }
+    if (multi) {
+        int l_end = EMIT(label_new);
+        EMIT(jump, l_end);
+        EMIT(label_assign, l_fail);
+        EMIT(rot_two);
+        EMIT(pop_top);
+        EMIT(label_assign, l_end);
+        EMIT(set_stack_size, stack_size + 1); // force stack size
+    }
+}
+
+void compile_star_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // TODO
+    assert(0);
+    compile_node(comp, pns->nodes[0]);
+    //EMIT(unary_op, "UNARY_STAR");
+}
+
+void compile_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    c_binary_op(comp, pns, RT_BINARY_OP_OR);
+}
+
+void compile_xor_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    c_binary_op(comp, pns, RT_BINARY_OP_XOR);
+}
+
+void compile_and_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    c_binary_op(comp, pns, RT_BINARY_OP_AND);
+}
+
+void compile_shift_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    compile_node(comp, pns->nodes[0]);
+    for (int i = 1; i + 1 < num_nodes; i += 2) {
+        compile_node(comp, pns->nodes[i + 1]);
+        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_LESS)) {
+            EMIT(binary_op, RT_BINARY_OP_LSHIFT);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_MORE)) {
+            EMIT(binary_op, RT_BINARY_OP_RSHIFT);
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+    }
+}
+
+void compile_arith_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    compile_node(comp, pns->nodes[0]);
+    for (int i = 1; i + 1 < num_nodes; i += 2) {
+        compile_node(comp, pns->nodes[i + 1]);
+        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PLUS)) {
+            EMIT(binary_op, RT_BINARY_OP_ADD);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MINUS)) {
+            EMIT(binary_op, RT_BINARY_OP_SUBTRACT);
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+    }
+}
+
+void compile_term(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    compile_node(comp, pns->nodes[0]);
+    for (int i = 1; i + 1 < num_nodes; i += 2) {
+        compile_node(comp, pns->nodes[i + 1]);
+        if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_STAR)) {
+            EMIT(binary_op, RT_BINARY_OP_MULTIPLY);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_SLASH)) {
+            EMIT(binary_op, RT_BINARY_OP_FLOOR_DIVIDE);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_SLASH)) {
+            EMIT(binary_op, RT_BINARY_OP_TRUE_DIVIDE);
+        } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PERCENT)) {
+            EMIT(binary_op, RT_BINARY_OP_MODULO);
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+    }
+}
+
+void compile_factor_2(compiler_t *comp, py_parse_node_struct_t *pns) {
+    compile_node(comp, pns->nodes[1]);
+    if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) {
+        EMIT(unary_op, RT_UNARY_OP_POSITIVE);
+    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) {
+        EMIT(unary_op, RT_UNARY_OP_NEGATIVE);
+    } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) {
+        EMIT(unary_op, RT_UNARY_OP_INVERT);
+    } else {
+        // shouldn't happen
+        assert(0);
+    }
+}
+
+void compile_trailer_paren_helper(compiler_t *comp, py_parse_node_struct_t *pns, bool is_method_call) {
+    // function to call is on top of stack
+
+    int old_n_arg_keyword = comp->n_arg_keyword;
+    bool old_have_star_arg = comp->have_star_arg;
+    bool old_have_dbl_star_arg = comp->have_dbl_star_arg;
+    comp->n_arg_keyword = 0;
+    comp->have_star_arg = false;
+    comp->have_dbl_star_arg = false;
+
+    compile_node(comp, pns->nodes[0]); // arguments to function call; can be null
+
+    // compute number of positional arguments
+    int n_positional = list_len(pns->nodes[0], PN_arglist) - comp->n_arg_keyword;
+    if (comp->have_star_arg) {
+        n_positional -= 1;
+    }
+    if (comp->have_dbl_star_arg) {
+        n_positional -= 1;
+    }
+
+    if (is_method_call) {
+        EMIT(call_method, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg);
+    } else {
+        EMIT(call_function, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg);
+    }
+
+    comp->n_arg_keyword = old_n_arg_keyword;
+    comp->have_star_arg = old_have_star_arg;
+    comp->have_dbl_star_arg = old_have_dbl_star_arg;
+}
+
+void compile_power_trailers(compiler_t *comp, py_parse_node_struct_t *pns) {
+    int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    for (int i = 0; i < num_nodes; i++) {
+        if (i + 1 < num_nodes && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i], PN_trailer_period) && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i + 1], PN_trailer_paren)) {
+            // optimisation for method calls a.f(...), following PyPy
+            py_parse_node_struct_t *pns_period = (py_parse_node_struct_t*)pns->nodes[i];
+            py_parse_node_struct_t *pns_paren = (py_parse_node_struct_t*)pns->nodes[i + 1];
+            EMIT(load_method, PY_PARSE_NODE_LEAF_ARG(pns_period->nodes[0])); // get the method
+            compile_trailer_paren_helper(comp, pns_paren, true);
+            i += 1;
+        } else {
+            compile_node(comp, pns->nodes[i]);
+        }
+    }
+}
+
+void compile_power_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) {
+    compile_node(comp, pns->nodes[0]);
+    EMIT(binary_op, RT_BINARY_OP_POWER);
+}
+
+void compile_atom_string(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // a list of strings
+    EMIT(load_const_verbatim_start);
+    EMIT(load_const_verbatim_str, "'");
+    int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns);
+    for (int i = 0; i < n; i++) {
+        // TODO allow concatenation of either strings or bytes, but not mixed
+        assert(PY_PARSE_NODE_IS_LEAF(pns->nodes[i]));
+        assert(PY_PARSE_NODE_LEAF_KIND(pns->nodes[i]) == PY_PARSE_NODE_STRING);
+        const char *str = qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
+        EMIT(load_const_verbatim_strn, str, strlen(str));
+    }
+    EMIT(load_const_verbatim_str, "'");
+    EMIT(load_const_verbatim_end);
+}
+
+// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node
+void compile_comprehension(compiler_t *comp, py_parse_node_struct_t *pns, scope_kind_t kind) {
+    assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2);
+    assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for));
+    py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1];
+
+    if (comp->pass == PASS_1) {
+        // create a new scope for this comprehension
+        scope_t *s = scope_new_and_link(comp, kind, (py_parse_node_t)pns);
+        // store the comprehension scope so the compiling function (this one) can use it at each pass
+        pns_comp_for->nodes[3] = (py_parse_node_t)s;
+    }
+
+    // get the scope for this comprehension
+    scope_t *this_scope = (scope_t*)pns_comp_for->nodes[3];
+
+    // compile the comprehension
+    close_over_variables_etc(comp, this_scope, 0, 0);
+
+    compile_node(comp, pns_comp_for->nodes[1]); // source of the iterator
+    EMIT(get_iter);
+    EMIT(call_function, 1, 0, false, false);
+}
+
+void compile_atom_paren(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+        // an empty tuple
+        /*
+        EMIT(build_tuple, 0);
+        */
+        c_tuple(comp, PY_PARSE_NODE_NULL, NULL);
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+        pns = (py_parse_node_struct_t*)pns->nodes[0];
+        assert(!PY_PARSE_NODE_IS_NULL(pns->nodes[1]));
+        if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
+            py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+            if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) {
+                // tuple of one item, with trailing comma
+                assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0]));
+                /*
+                compile_node(comp, pns->nodes[0]);
+                EMIT(build_tuple, 1);
+                */
+                c_tuple(comp, pns->nodes[0], NULL);
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) {
+                // tuple of many items
+                /*
+                compile_node(comp, pns->nodes[0]);
+                compile_generic_all_nodes(comp, pns2);
+                EMIT(build_tuple, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns2));
+                */
+                c_tuple(comp, pns->nodes[0], pns2);
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) {
+                // generator expression
+                compile_comprehension(comp, pns, SCOPE_GEN_EXPR);
+            } else {
+                // tuple with 2 items
+                goto tuple_with_2_items;
+            }
+        } else {
+            // tuple with 2 items
+            tuple_with_2_items:
+            /*
+            compile_node(comp, pns->nodes[0]);
+            compile_node(comp, pns->nodes[1]);
+            EMIT(build_tuple, 2);
+            */
+            c_tuple(comp, PY_PARSE_NODE_NULL, pns);
+        }
+    } else {
+        // parenthesis around a single item, is just that item
+        compile_node(comp, pns->nodes[0]);
+    }
+}
+
+void compile_atom_bracket(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+        // empty list
+        EMIT(build_list, 0);
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
+        py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[0];
+        if (PY_PARSE_NODE_IS_STRUCT(pns2->nodes[1])) {
+            py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns2->nodes[1];
+            if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3b) {
+                // list of one item, with trailing comma
+                assert(PY_PARSE_NODE_IS_NULL(pns3->nodes[0]));
+                compile_node(comp, pns2->nodes[0]);
+                EMIT(build_list, 1);
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3c) {
+                // list of many items
+                compile_node(comp, pns2->nodes[0]);
+                compile_generic_all_nodes(comp, pns3);
+                EMIT(build_list, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns3));
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_comp_for) {
+                // list comprehension
+                compile_comprehension(comp, pns2, SCOPE_LIST_COMP);
+            } else {
+                // list with 2 items
+                goto list_with_2_items;
+            }
+        } else {
+            // list with 2 items
+            list_with_2_items:
+            compile_node(comp, pns2->nodes[0]);
+            compile_node(comp, pns2->nodes[1]);
+            EMIT(build_list, 2);
+        }
+    } else {
+        // list with 1 item
+        compile_node(comp, pns->nodes[0]);
+        EMIT(build_list, 1);
+    }
+}
+
+void compile_atom_brace(compiler_t *comp, py_parse_node_struct_t *pns) {
+    py_parse_node_t pn = pns->nodes[0];
+    if (PY_PARSE_NODE_IS_NULL(pn)) {
+        // empty dict
+        EMIT(build_map, 0);
+    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+        pns = (py_parse_node_struct_t*)pn;
+        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker_item) {
+            // dict with one element
+            EMIT(build_map, 1);
+            compile_node(comp, pn);
+            EMIT(store_map);
+        } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker) {
+            assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should succeed
+            py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1];
+            if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_dictorsetmaker_list) {
+                // dict/set with multiple elements
+
+                // get tail elements (2nd, 3rd, ...)
+                py_parse_node_t *nodes;
+                int n = list_get(&pns1->nodes[0], PN_dictorsetmaker_list2, &nodes);
+
+                // first element sets whether it's a dict or set
+                bool is_dict;
+                if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) {
+                    // a dictionary
+                    EMIT(build_map, 1 + n);
+                    compile_node(comp, pns->nodes[0]);
+                    EMIT(store_map);
+                    is_dict = true;
+                } else {
+                    // a set
+                    compile_node(comp, pns->nodes[0]); // 1st value of set
+                    is_dict = false;
+                }
+
+                // process rest of elements
+                for (int i = 0; i < n; i++) {
+                    py_parse_node_t pn = nodes[i];
+                    bool is_key_value = PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dictorsetmaker_item);
+                    compile_node(comp, pn);
+                    if (is_dict) {
+                        if (!is_key_value) {
+                            printf("SyntaxError?: expecting key:value for dictionary");
+                            return;
+                        }
+                        EMIT(store_map);
+                    } else {
+                        if (is_key_value) {
+                            printf("SyntaxError?: expecting just a value for set");
+                            return;
+                        }
+                    }
+                }
+
+                // if it's a set, build it
+                if (!is_dict) {
+                    EMIT(build_set, 1 + n);
+                }
+            } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_comp_for) {
+                // dict/set comprehension
+                if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) {
+                    // a dictionary comprehension
+                    compile_comprehension(comp, pns, SCOPE_DICT_COMP);
+                } else {
+                    // a set comprehension
+                    compile_comprehension(comp, pns, SCOPE_SET_COMP);
+                }
+            } else {
+                // shouldn't happen
+                assert(0);
+            }
+        } else {
+            // set with one element
+            goto set_with_one_element;
+        }
+    } else {
+        // set with one element
+        set_with_one_element:
+        compile_node(comp, pn);
+        EMIT(build_set, 1);
+    }
+}
+
+void compile_trailer_paren(compiler_t *comp, py_parse_node_struct_t *pns) {
+    compile_trailer_paren_helper(comp, pns, false);
+}
+
+void compile_trailer_bracket(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // object who's index we want is on top of stack
+    compile_node(comp, pns->nodes[0]); // the index
+    EMIT(binary_op, RT_BINARY_OP_SUBSCR);
+}
+
+void compile_trailer_period(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // object who's attribute we want is on top of stack
+    EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // attribute to get
+}
+
+void compile_subscript_3_helper(compiler_t *comp, py_parse_node_struct_t *pns) {
+    assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3); // should always be
+    py_parse_node_t pn = pns->nodes[0];
+    if (PY_PARSE_NODE_IS_NULL(pn)) {
+        // [?:]
+        EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        EMIT(build_slice, 2);
+    } else if (PY_PARSE_NODE_IS_STRUCT(pn)) {
+        pns = (py_parse_node_struct_t*)pn;
+        if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3c) {
+            EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+            pn = pns->nodes[0];
+            if (PY_PARSE_NODE_IS_NULL(pn)) {
+                // [?::]
+                EMIT(build_slice, 2);
+            } else {
+                // [?::x]
+                compile_node(comp, pn);
+                EMIT(build_slice, 3);
+            }
+        } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3d) {
+            compile_node(comp, pns->nodes[0]);
+            assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be
+            pns = (py_parse_node_struct_t*)pns->nodes[1];
+            assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_sliceop); // should always be
+            if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+                // [?:x:]
+                EMIT(build_slice, 2);
+            } else {
+                // [?:x:x]
+                compile_node(comp, pns->nodes[0]);
+                EMIT(build_slice, 3);
+            }
+        } else {
+            // [?:x]
+            compile_node(comp, pn);
+            EMIT(build_slice, 2);
+        }
+    } else {
+        // [?:x]
+        compile_node(comp, pn);
+        EMIT(build_slice, 2);
+    }
+}
+
+void compile_subscript_2(compiler_t *comp, py_parse_node_struct_t *pns) {
+    compile_node(comp, pns->nodes[0]); // start of slice
+    assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be
+    compile_subscript_3_helper(comp, (py_parse_node_struct_t*)pns->nodes[1]);
+}
+
+void compile_subscript_3(compiler_t *comp, py_parse_node_struct_t *pns) {
+    EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+    compile_subscript_3_helper(comp, pns);
+}
+
+void compile_dictorsetmaker_item(compiler_t *comp, py_parse_node_struct_t *pns) {
+    // if this is called then we are compiling a dict key:value pair
+    compile_node(comp, pns->nodes[1]); // value
+    compile_node(comp, pns->nodes[0]); // key
+}
+
+void compile_classdef(compiler_t *comp, py_parse_node_struct_t *pns) {
+    qstr cname = compile_classdef_helper(comp, pns);
+    // store class object into class name
+    emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, cname);
+}
+
+void compile_arglist_star(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->have_star_arg) {
+        printf("SyntaxError?: can't have multiple *x\n");
+        return;
+    }
+    comp->have_star_arg = true;
+    compile_node(comp, pns->nodes[0]);
+}
+
+void compile_arglist_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->have_dbl_star_arg) {
+        printf("SyntaxError?: can't have multiple **x\n");
+        return;
+    }
+    comp->have_dbl_star_arg = true;
+    compile_node(comp, pns->nodes[0]);
+}
+
+void compile_argument(compiler_t *comp, py_parse_node_struct_t *pns) {
+    assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be
+    py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1];
+    if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_argument_3) {
+        if (!PY_PARSE_NODE_IS_ID(pns->nodes[0])) {
+            printf("SyntaxError?: lhs of keyword argument must be an id\n");
+            return;
+        }
+        EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]));
+        compile_node(comp, pns2->nodes[0]);
+        comp->n_arg_keyword += 1;
+    } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) {
+        compile_comprehension(comp, pns, SCOPE_GEN_EXPR);
+    } else {
+        // shouldn't happen
+        assert(0);
+    }
+}
+
+void compile_yield_expr(compiler_t *comp, py_parse_node_struct_t *pns) {
+    if (comp->scope_cur->kind != SCOPE_FUNCTION) {
+        printf("SyntaxError: 'yield' outside function\n");
+        return;
+    }
+    if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+        EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        EMIT(yield_value);
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_yield_arg_from)) {
+        pns = (py_parse_node_struct_t*)pns->nodes[0];
+        compile_node(comp, pns->nodes[0]);
+        EMIT(get_iter);
+        EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        EMIT(yield_from);
+    } else {
+        compile_node(comp, pns->nodes[0]);
+        EMIT(yield_value);
+    }
+}
+
+typedef void (*compile_function_t)(compiler_t*, py_parse_node_struct_t*);
+static compile_function_t compile_function[] = {
+    NULL,
+#define nc NULL
+#define c(f) compile_##f
+#define DEF_RULE(rule, comp, kind, arg...) comp,
+#include "grammar.h"
+#undef nc
+#undef c
+#undef DEF_RULE
+};
+
+void compile_node(compiler_t *comp, py_parse_node_t pn) {
+    if (PY_PARSE_NODE_IS_NULL(pn)) {
+        // pass
+    } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+        int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+        switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
+            case PY_PARSE_NODE_ID: emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg); break;
+            case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_small_int, arg); break;
+            case PY_PARSE_NODE_INTEGER: EMIT(load_const_int, arg); break;
+            case PY_PARSE_NODE_DECIMAL: EMIT(load_const_dec, arg); break;
+            case PY_PARSE_NODE_STRING: EMIT(load_const_str, arg, false); break;
+            case PY_PARSE_NODE_BYTES: EMIT(load_const_str, arg, true); break;
+            case PY_PARSE_NODE_TOKEN: EMIT(load_const_tok, arg); break;
+            default: assert(0);
+        }
+    } else {
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+        compile_function_t f = compile_function[PY_PARSE_NODE_STRUCT_KIND(pns)];
+        if (f == NULL) {
+            printf("node %u cannot be compiled\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns));
+            parse_node_show(pn, 0);
+            assert(0);
+        } else {
+            f(comp, pns);
+        }
+    }
+}
+
+void compile_scope_func_lambda_param(compiler_t *comp, py_parse_node_t pn, pn_kind_t pn_name, pn_kind_t pn_star, pn_kind_t pn_dbl_star, bool allow_annotations) {
+    // TODO verify that *k and **k are last etc
+    assert(PY_PARSE_NODE_IS_STRUCT(pn));
+    py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn;
+    qstr param_name = 0;
+    py_parse_node_t pn_annotation = PY_PARSE_NODE_NULL;
+    if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_name) {
+        param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+        //int node_index = 1; unused
+        if (allow_annotations) {
+            if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+                // this parameter has an annotation
+                pn_annotation = pns->nodes[1];
+            }
+            //node_index = 2; unused
+        }
+        /* this is obsolete now that num dict/default params are calculated in compile_funcdef_param
+        if (!PY_PARSE_NODE_IS_NULL(pns->nodes[node_index])) {
+            // this parameter has a default value
+            if (comp->have_bare_star) {
+                comp->scope_cur->num_dict_params += 1;
+            } else {
+                comp->scope_cur->num_default_params += 1;
+            }
+        }
+        */
+        if (comp->have_bare_star) {
+            // comes after a bare star, so doesn't count as a parameter
+        } else {
+            comp->scope_cur->num_params += 1;
+        }
+    } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_star) {
+        if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) {
+            // bare star
+            // TODO see http://www.python.org/dev/peps/pep-3102/
+            comp->have_bare_star = true;
+            //assert(comp->scope_cur->num_dict_params == 0);
+        } else if (PY_PARSE_NODE_IS_ID(pns->nodes[0])) {
+            // named star
+            comp->scope_cur->flags |= SCOPE_FLAG_VARARGS;
+            param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+        } else if (allow_annotations && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_tfpdef)) {
+            // named star with annotation
+            comp->scope_cur->flags |= SCOPE_FLAG_VARARGS;
+            pns = (py_parse_node_struct_t*)pns->nodes[0];
+            param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+            pn_annotation = pns->nodes[1];
+        } else {
+            // shouldn't happen
+            assert(0);
+        }
+    } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_dbl_star) {
+        param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
+        if (allow_annotations && !PY_PARSE_NODE_IS_NULL(pns->nodes[1])) {
+            // this parameter has an annotation
+            pn_annotation = pns->nodes[1];
+        }
+        comp->scope_cur->flags |= SCOPE_FLAG_VARKEYWORDS;
+    } else {
+        // TODO anything to implement?
+        assert(0);
+    }
+
+    if (param_name != 0) {
+        if (!PY_PARSE_NODE_IS_NULL(pn_annotation)) {
+            // TODO this parameter has an annotation
+        }
+        bool added;
+        id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, param_name, &added);
+        if (!added) {
+            printf("SyntaxError?: same name used for parameter; %s\n", qstr_str(param_name));
+            return;
+        }
+        id_info->param = true;
+        id_info->kind = ID_INFO_KIND_LOCAL;
+    }
+}
+
+void compile_scope_func_param(compiler_t *comp, py_parse_node_t pn) {
+    compile_scope_func_lambda_param(comp, pn, PN_typedargslist_name, PN_typedargslist_star, PN_typedargslist_dbl_star, true);
+}
+
+void compile_scope_lambda_param(compiler_t *comp, py_parse_node_t pn) {
+    compile_scope_func_lambda_param(comp, pn, PN_varargslist_name, PN_varargslist_star, PN_varargslist_dbl_star, false);
+}
+
+void compile_scope_comp_iter(compiler_t *comp, py_parse_node_t pn_iter, py_parse_node_t pn_inner_expr, int l_top, int for_depth) {
+    tail_recursion:
+    if (PY_PARSE_NODE_IS_NULL(pn_iter)) {
+        // no more nested if/for; compile inner expression
+        compile_node(comp, pn_inner_expr);
+        if (comp->scope_cur->kind == SCOPE_LIST_COMP) {
+            EMIT(list_append, for_depth + 2);
+        } else if (comp->scope_cur->kind == SCOPE_DICT_COMP) {
+            EMIT(map_add, for_depth + 2);
+        } else if (comp->scope_cur->kind == SCOPE_SET_COMP) {
+            EMIT(set_add, for_depth + 2);
+        } else {
+            EMIT(yield_value);
+            EMIT(pop_top);
+        }
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_if)) {
+        // if condition
+        py_parse_node_struct_t *pns_comp_if = (py_parse_node_struct_t*)pn_iter;
+        c_if_cond(comp, pns_comp_if->nodes[0], false, l_top);
+        pn_iter = pns_comp_if->nodes[1];
+        goto tail_recursion;
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_for)) {
+        // for loop
+        py_parse_node_struct_t *pns_comp_for2 = (py_parse_node_struct_t*)pn_iter;
+        compile_node(comp, pns_comp_for2->nodes[1]);
+        int l_end2 = EMIT(label_new);
+        int l_top2 = EMIT(label_new);
+        EMIT(get_iter);
+        EMIT(label_assign, l_top2);
+        EMIT(for_iter, l_end2);
+        c_assign(comp, pns_comp_for2->nodes[0], ASSIGN_STORE);
+        compile_scope_comp_iter(comp, pns_comp_for2->nodes[2], pn_inner_expr, l_top2, for_depth + 1);
+        EMIT(jump, l_top2);
+        EMIT(label_assign, l_end2);
+        EMIT(for_iter_end);
+    } else {
+        // shouldn't happen
+        assert(0);
+    }
+}
+
+void check_for_doc_string(compiler_t *comp, py_parse_node_t pn) {
+    // see http://www.python.org/dev/peps/pep-0257/
+
+    // look for the first statement
+    if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) {
+        // fall through
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_file_input_2)) {
+        pn = ((py_parse_node_struct_t*)pn)->nodes[0];
+    } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_suite_block_stmts)) {
+        pn = ((py_parse_node_struct_t*)pn)->nodes[0];
+    } else {
+        return;
+    }
+
+    // check the first statement for a doc string
+    if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) {
+        py_parse_node_struct_t* pns = (py_parse_node_struct_t*)pn;
+        if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) {
+            int kind = PY_PARSE_NODE_LEAF_KIND(pns->nodes[0]);
+            if (kind == PY_PARSE_NODE_STRING) {
+                compile_node(comp, pns->nodes[0]); // a doc string
+                // store doc string
+                emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___doc__);
+            }
+        }
+    }
+}
+
+void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
+    comp->pass = pass;
+    comp->scope_cur = scope;
+    emit_start_pass(comp->emit, pass, scope);
+
+    if (comp->pass == PASS_1) {
+        scope->stack_size = 0;
+    }
+
+    if (comp->pass == PASS_3) {
+        //printf("----\n");
+        scope_print_info(scope);
+    }
+
+    // compile
+    if (scope->kind == SCOPE_MODULE) {
+        check_for_doc_string(comp, scope->pn);
+        compile_node(comp, scope->pn);
+        EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        EMIT(return_value);
+    } else if (scope->kind == SCOPE_FUNCTION) {
+        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+        assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_funcdef);
+
+        // work out number of parameters, keywords and default parameters, and add them to the id_info array
+        if (comp->pass == PASS_1) {
+            comp->have_bare_star = false;
+            apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_scope_func_param);
+        }
+
+        assert(pns->nodes[2] == 0); // 2 is something...
+
+        compile_node(comp, pns->nodes[3]); // 3 is function body
+        // emit return if it wasn't the last opcode
+        if (!emit_last_emit_was_return_value(comp->emit)) {
+            EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+            EMIT(return_value);
+        }
+    } else if (scope->kind == SCOPE_LAMBDA) {
+        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+        assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 3);
+
+        // work out number of parameters, keywords and default parameters, and add them to the id_info array
+        if (comp->pass == PASS_1) {
+            comp->have_bare_star = false;
+            apply_to_single_or_list(comp, pns->nodes[0], PN_varargslist, compile_scope_lambda_param);
+        }
+
+        compile_node(comp, pns->nodes[1]); // 1 is lambda body
+        EMIT(return_value);
+    } else if (scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) {
+        // a bit of a hack at the moment
+
+        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+        assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2);
+        assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for));
+        py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1];
+
+        qstr qstr_arg = qstr_from_strn_copy(".0", 2);
+        if (comp->pass == PASS_1) {
+            bool added;
+            id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, qstr_arg, &added);
+            assert(added);
+            id_info->kind = ID_INFO_KIND_LOCAL;
+            scope->num_params = 1;
+        }
+
+        if (scope->kind == SCOPE_LIST_COMP) {
+            EMIT(build_list, 0);
+        } else if (scope->kind == SCOPE_DICT_COMP) {
+            EMIT(build_map, 0);
+        } else if (scope->kind == SCOPE_SET_COMP) {
+            EMIT(build_set, 0);
+        }
+
+        int l_end = EMIT(label_new);
+        int l_top = EMIT(label_new);
+        emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, qstr_arg);
+        EMIT(label_assign, l_top);
+        EMIT(for_iter, l_end);
+        c_assign(comp, pns_comp_for->nodes[0], ASSIGN_STORE);
+        compile_scope_comp_iter(comp, pns_comp_for->nodes[2], pns->nodes[0], l_top, 0);
+        EMIT(jump, l_top);
+        EMIT(label_assign, l_end);
+        EMIT(for_iter_end);
+
+        if (scope->kind == SCOPE_GEN_EXPR) {
+            EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        }
+        EMIT(return_value);
+    } else {
+        assert(scope->kind == SCOPE_CLASS);
+        assert(PY_PARSE_NODE_IS_STRUCT(scope->pn));
+        py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn;
+        assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_classdef);
+
+        if (comp->pass == PASS_1) {
+            bool added;
+            id_info_t *id_info = scope_find_or_add_id(scope, comp->qstr___class__, &added);
+            assert(added);
+            id_info->kind = ID_INFO_KIND_LOCAL;
+            id_info = scope_find_or_add_id(scope, comp->qstr___locals__, &added);
+            assert(added);
+            id_info->kind = ID_INFO_KIND_LOCAL;
+            id_info->param = true;
+            scope->num_params = 1; // __locals__ is the parameter
+        }
+
+        emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___locals__);
+        EMIT(store_locals);
+        emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___name__);
+        emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___module__);
+        EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // 0 is class name
+        emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___qualname__);
+
+        check_for_doc_string(comp, pns->nodes[2]);
+        compile_node(comp, pns->nodes[2]); // 2 is class body
+
+        id_info_t *id = scope_find(scope, comp->qstr___class__);
+        assert(id != NULL);
+        if (id->kind == ID_INFO_KIND_LOCAL) {
+            EMIT(load_const_tok, PY_TOKEN_KW_NONE);
+        } else {
+            EMIT(load_closure, comp->qstr___class__);
+        }
+        EMIT(return_value);
+    }
+
+    emit_end_pass(comp->emit);
+}
+
+void compile_scope_compute_things(compiler_t *comp, scope_t *scope) {
+    // in functions, turn implicit globals into explicit globals
+    // compute num_locals, and the index of each local
+    scope->num_locals = 0;
+    for (int i = 0; i < scope->id_info_len; i++) {
+        id_info_t *id = &scope->id_info[i];
+        if (scope->kind == SCOPE_CLASS && id->qstr == comp->qstr___class__) {
+            // __class__ is not counted as a local; if it's used then it becomes a ID_INFO_KIND_CELL
+            continue;
+        }
+        if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+            id->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+        }
+        if (id->param || id->kind == ID_INFO_KIND_LOCAL) {
+            id->local_num = scope->num_locals;
+            scope->num_locals += 1;
+        }
+    }
+
+    // compute flags
+    //scope->flags = 0; since we set some things in parameters
+    if (scope->kind != SCOPE_MODULE) {
+        scope->flags |= SCOPE_FLAG_NEWLOCALS;
+    }
+    if (scope->kind == SCOPE_FUNCTION || scope->kind == SCOPE_LAMBDA || scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) {
+        assert(scope->parent != NULL);
+        scope->flags |= SCOPE_FLAG_OPTIMISED;
+
+        // TODO possibly other ways it can be nested
+        if (scope->parent->kind == SCOPE_FUNCTION || (scope->parent->kind == SCOPE_CLASS && scope->parent->parent->kind == SCOPE_FUNCTION)) {
+            scope->flags |= SCOPE_FLAG_NESTED;
+        }
+    }
+    int num_free = 0;
+    for (int i = 0; i < scope->id_info_len; i++) {
+        id_info_t *id = &scope->id_info[i];
+        if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
+            num_free += 1;
+        }
+    }
+    if (num_free == 0) {
+        scope->flags |= SCOPE_FLAG_NOFREE;
+    }
+}
+
+void py_compile(py_parse_node_t pn) {
+    compiler_t *comp = m_new(compiler_t, 1);
+
+    comp->qstr___class__ = qstr_from_strn_copy("__class__", 9);
+    comp->qstr___locals__ = qstr_from_strn_copy("__locals__", 10);
+    comp->qstr___name__ = qstr_from_strn_copy("__name__", 8);
+    comp->qstr___module__ = qstr_from_strn_copy("__module__", 10);
+    comp->qstr___qualname__ = qstr_from_strn_copy("__qualname__", 12);
+    comp->qstr___doc__ = qstr_from_strn_copy("__doc__", 7);
+    comp->qstr_assertion_error = qstr_from_strn_copy("AssertionError", 14);
+
+    comp->break_label = 0;
+    comp->continue_label = 0;
+    comp->except_nest_level = 0;
+    comp->scope_head = NULL;
+    comp->scope_cur = NULL;
+
+    comp->emit = emit_new(comp->qstr___class__);
+
+    pn = fold_constants(pn);
+    scope_new_and_link(comp, SCOPE_MODULE, pn);
+
+    for (scope_t *s = comp->scope_head; s != NULL; s = s->next) {
+        compile_scope(comp, s, PASS_1);
+    }
+
+    for (scope_t *s = comp->scope_head; s != NULL; s = s->next) {
+        compile_scope_compute_things(comp, s);
+    }
+
+    for (scope_t *s = comp->scope_head; s != NULL; s = s->next) {
+        compile_scope(comp, s, PASS_2);
+        compile_scope(comp, s, PASS_3);
+    }
+
+    m_free(comp);
+}
diff --git a/py/compile.h b/py/compile.h
new file mode 100644
index 0000000..339acca
--- /dev/null
+++ b/py/compile.h
@@ -0,0 +1 @@
+void py_compile(py_parse_node_t pn);
diff --git a/py/emit.h b/py/emit.h
new file mode 100644
index 0000000..8cad745
--- /dev/null
+++ b/py/emit.h
@@ -0,0 +1,120 @@
+//#define EMIT_DO_CPY
+#define EMIT_DO_BC
+//#define EMIT_DO_X64
+//#define EMIT_DO_THUMB
+
+/* Notes on passes:
+ * We don't know exactly the opcodes in pass 1 because they depend on the
+ * closing over of variables (LOAD_CLOSURE, BUILD_TUPLE, MAKE_CLOSURE), which
+ * depends on determining the scope of variables in each function, and this
+ * is not known until the end of pass 1.
+ * As a consequence, we don't know the maximum stack size until the end of pass 2.
+ * This is problematic for some emitters (x64) since they need to know the maximum
+ * stack size to compile the entry to the function, and this effects code size.
+ */
+
+typedef enum {
+    PASS_1 = 1, // work out id's and their kind, and number of labels
+    PASS_2 = 2, // work out stack size and code size and label offsets
+    PASS_3 = 3, // emit code
+} pass_kind_t;
+
+typedef struct _emitter_t emitter_t;
+
+void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr);
+void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr);
+void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr);
+void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr);
+void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr);
+
+emitter_t *emit_new();
+void emit_set_native_types(emitter_t *emit, bool do_native_types);
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope);
+void emit_end_pass(emitter_t *emit);
+bool emit_last_emit_was_return_value(emitter_t *emit);
+int emit_get_stack_size(emitter_t *emit);
+void emit_set_stack_size(emitter_t *emit, int size);
+
+int emit_label_new(emitter_t *emit);
+void emit_label_assign(emitter_t *emit, int l);
+void emit_import_name(emitter_t *emit, qstr qstr);
+void emit_import_from(emitter_t *emit, qstr qstr);
+void emit_import_star(emitter_t *emit);
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok);
+void emit_load_const_small_int(emitter_t *emit, int arg);
+void emit_load_const_int(emitter_t *emit, qstr qstr);
+void emit_load_const_dec(emitter_t *emit, qstr qstr);
+void emit_load_const_id(emitter_t *emit, qstr qstr);
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes);
+void emit_load_const_verbatim_start(emitter_t *emit);
+void emit_load_const_verbatim_int(emitter_t *emit, int val);
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str);
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len);
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes);
+void emit_load_const_verbatim_end(emitter_t *emit);
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num);
+void emit_load_name(emitter_t *emit, qstr qstr);
+void emit_load_global(emitter_t *emit, qstr qstr);
+void emit_load_deref(emitter_t *emit, qstr qstr);
+void emit_load_closure(emitter_t *emit, qstr qstr);
+void emit_load_attr(emitter_t *emit, qstr qstr);
+void emit_load_method(emitter_t *emit, qstr qstr);
+void emit_load_build_class(emitter_t *emit);
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num);
+void emit_store_name(emitter_t *emit, qstr qstr);
+void emit_store_global(emitter_t *emit, qstr qstr);
+void emit_store_deref(emitter_t *emit, qstr qstr);
+void emit_store_attr(emitter_t *emit, qstr qstr);
+void emit_store_locals(emitter_t *emit);
+void emit_store_subscr(emitter_t *emit);
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num);
+void emit_delete_name(emitter_t *emit, qstr qstr);
+void emit_delete_global(emitter_t *emit, qstr qstr);
+void emit_delete_deref(emitter_t *emit, qstr qstr);
+void emit_delete_attr(emitter_t *emit, qstr qstr);
+void emit_delete_subscr(emitter_t *emit);
+void emit_dup_top(emitter_t *emit);
+void emit_dup_top_two(emitter_t *emit);
+void emit_pop_top(emitter_t *emit);
+void emit_rot_two(emitter_t *emit);
+void emit_rot_three(emitter_t *emit);
+void emit_jump(emitter_t *emit, int label);
+void emit_pop_jump_if_true(emitter_t *emit, int label);
+void emit_pop_jump_if_false(emitter_t *emit, int label);
+void emit_jump_if_true_or_pop(emitter_t *emit, int label);
+void emit_jump_if_false_or_pop(emitter_t *emit, int label);
+void emit_setup_loop(emitter_t *emit, int label);
+void emit_break_loop(emitter_t *emit, int label);
+void emit_continue_loop(emitter_t *emit, int label);
+void emit_setup_with(emitter_t *emit, int label);
+void emit_with_cleanup(emitter_t *emit);
+void emit_setup_except(emitter_t *emit, int label);
+void emit_setup_finally(emitter_t *emit, int label);
+void emit_end_finally(emitter_t *emit);
+void emit_get_iter(emitter_t *emit); // tos = getiter(tos)
+void emit_for_iter(emitter_t *emit, int label);
+void emit_for_iter_end(emitter_t *emit);
+void emit_pop_block(emitter_t *emit);
+void emit_pop_except(emitter_t *emit);
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op);
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op);
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op);
+void emit_build_tuple(emitter_t *emit, int n_args);
+void emit_build_list(emitter_t *emit, int n_args);
+void emit_list_append(emitter_t *emit, int list_stack_index);
+void emit_build_map(emitter_t *emit, int n_args);
+void emit_store_map(emitter_t *emit);
+void emit_map_add(emitter_t *emit, int map_stack_index);
+void emit_build_set(emitter_t *emit, int n_args);
+void emit_set_add(emitter_t *emit, int set_stack_index);
+void emit_build_slice(emitter_t *emit, int n_args);
+void emit_unpack_sequence(emitter_t *emit, int n_args);
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right);
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params);
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params);
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg);
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg);
+void emit_return_value(emitter_t *emit);
+void emit_raise_varargs(emitter_t *emit, int n_args);
+void emit_yield_value(emitter_t *emit);
+void emit_yield_from(emitter_t *emit);
diff --git a/py/emitbc.c b/py/emitbc.c
new file mode 100644
index 0000000..9d159ae
--- /dev/null
+++ b/py/emitbc.c
@@ -0,0 +1,692 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "compile.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+#include "bc.h"
+
+#ifdef EMIT_DO_BC
+
+struct _emitter_t {
+    int pass;
+    int next_label;
+    int stack_size;
+    bool last_emit_was_return_value;
+
+    scope_t *scope;
+
+    int max_num_labels;
+    uint *label_offsets;
+
+    uint code_offset;
+    uint code_size;
+    byte *code_base;
+    byte dummy_data[8];
+};
+
+emitter_t *emit_new() {
+    emitter_t *emit = m_new(emitter_t, 1);
+    emit->max_num_labels = 0;
+    emit->label_offsets = NULL;
+    emit->code_offset = 0;
+    emit->code_size = 0;
+    emit->code_base = NULL;
+    return emit;
+}
+
+uint emit_get_code_size(emitter_t* emit) {
+    return emit->code_size;
+}
+
+void* emit_get_code(emitter_t* emit) {
+    return emit->code_base;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+    emit->pass = pass;
+    emit->next_label = 1;
+    emit->stack_size = 0;
+    emit->last_emit_was_return_value = false;
+    emit->scope = scope;
+    if (pass == PASS_1) {
+        scope->unique_code_id = rt_get_new_unique_code_id();
+    } else if (pass > PASS_1) {
+        if (emit->label_offsets == NULL) {
+            emit->label_offsets = m_new(uint, emit->max_num_labels);
+        }
+        if (pass == PASS_2) {
+            memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(uint));
+        }
+    }
+    emit->code_offset = 0;
+}
+
+void emit_end_pass(emitter_t *emit) {
+    // check stack is back to zero size
+    if (emit->stack_size != 0) {
+        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+    }
+
+    if (emit->pass == PASS_1) {
+        // calculate number of labels need
+        if (emit->next_label > emit->max_num_labels) {
+            emit->max_num_labels = emit->next_label;
+        }
+
+    } else if (emit->pass == PASS_2) {
+        // calculate size of code in bytes
+        emit->code_size = emit->code_offset;
+        emit->code_base = m_new(byte, emit->code_size);
+        printf("code_size: %u\n", emit->code_size);
+
+    } else if (emit->pass == PASS_3) {
+        rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params);
+    }
+}
+
+// all functions must go through this one to emit bytes
+static byte* emit_get_cur_to_write_bytes(emitter_t* emit, int num_bytes_to_write) {
+    //printf("emit %d\n", num_bytes_to_write);
+    if (emit->pass < PASS_3) {
+        emit->code_offset += num_bytes_to_write;
+        return emit->dummy_data;
+    } else {
+        assert(emit->code_offset + num_bytes_to_write <= emit->code_size);
+        byte *c = emit->code_base + emit->code_offset;
+        emit->code_offset += num_bytes_to_write;
+        return c;
+    }
+}
+
+static void emit_write_byte_1(emitter_t* emit, byte b1) {
+    byte* c = emit_get_cur_to_write_bytes(emit, 1);
+    c[0] = b1;
+}
+
+static void emit_write_byte_1_byte(emitter_t* emit, byte b1, uint b2) {
+    assert((b2 & (~0xff)) == 0);
+    byte* c = emit_get_cur_to_write_bytes(emit, 2);
+    c[0] = b1;
+    c[1] = b2;
+}
+
+static void emit_write_byte_1_int(emitter_t* emit, byte b1, int num) {
+    assert((num & (~0x7fff)) == 0 || (num & (~0x7fff)) == (~0x7fff));
+    byte* c = emit_get_cur_to_write_bytes(emit, 3);
+    c[0] = b1;
+    c[1] = num;
+    c[2] = num >> 8;
+}
+
+static void emit_write_byte_1_uint(emitter_t* emit, byte b1, uint num) {
+    if (num <= 127) { // fits in 0x7f
+        // fit argument in single byte
+        byte* c = emit_get_cur_to_write_bytes(emit, 2);
+        c[0] = b1;
+        c[1] = num;
+    } else if (num <= 16383) { // fits in 0x3fff
+        // fit argument in two bytes
+        byte* c = emit_get_cur_to_write_bytes(emit, 3);
+        c[0] = b1;
+        c[1] = (num >> 8) | 0x80;
+        c[2] = num;
+    } else {
+        // larger numbers not implemented/supported
+        assert(0);
+    }
+}
+
+static void emit_write_byte_1_qstr(emitter_t* emit, byte b1, qstr qstr) {
+    emit_write_byte_1_uint(emit, b1, qstr);
+}
+
+static void emit_write_byte_1_label(emitter_t* emit, byte b1, int label) {
+    uint code_offset;
+    if (emit->pass < PASS_3) {
+        code_offset = 0;
+    } else {
+        code_offset = emit->label_offsets[label];
+    }
+    emit_write_byte_1_uint(emit, b1, code_offset);
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+    return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+    return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+    if (emit->pass > PASS_1) {
+        emit->stack_size = size;
+    }
+}
+
+static void emit_pre(emitter_t *emit, int stack_size_delta) {
+    if (emit->pass > PASS_1) {
+        emit->stack_size += stack_size_delta;
+        if (emit->stack_size > emit->scope->stack_size) {
+            emit->scope->stack_size = emit->stack_size;
+        }
+    }
+    emit->last_emit_was_return_value = false;
+}
+
+int emit_label_new(emitter_t *emit) {
+    return emit->next_label++;
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+    emit_pre(emit, 0);
+    if (emit->pass > PASS_1) {
+        assert(l < emit->max_num_labels);
+        if (emit->pass == PASS_2) {
+            // assign label offset
+            assert(emit->label_offsets[l] == -1);
+            emit->label_offsets[l] = emit->code_offset;
+        } else if (emit->pass == PASS_3) {
+            // ensure label offset has not changed from PASS_2 to PASS_3
+            assert(emit->label_offsets[l] == emit->code_offset);
+            //printf("l%d: (at %d)\n", l, emit->code_offset);
+        }
+    }
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_qstr(emit, PYBC_IMPORT_NAME, qstr);
+}
+
+void emit_import_from(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_qstr(emit, PYBC_IMPORT_FROM, qstr);
+}
+
+void emit_import_star(emitter_t *emit) {
+    emit_pre(emit, -1);
+    emit_write_byte_1(emit, PYBC_IMPORT_STAR);
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+    emit_pre(emit, 1);
+    switch (tok) {
+        case PY_TOKEN_KW_FALSE: emit_write_byte_1(emit, PYBC_LOAD_CONST_FALSE); break;
+        case PY_TOKEN_KW_NONE: emit_write_byte_1(emit, PYBC_LOAD_CONST_NONE); break;
+        case PY_TOKEN_KW_TRUE: emit_write_byte_1(emit, PYBC_LOAD_CONST_TRUE); break;
+        default: assert(0);
+    }
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_int(emit, PYBC_LOAD_CONST_SMALL_INT, arg);
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_INT, qstr);
+}
+
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_DEC, qstr);
+}
+
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_ID, qstr);
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+    emit_pre(emit, 1);
+    if (bytes) {
+        emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_BYTES, qstr);
+    } else {
+        emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_STRING, qstr);
+    }
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+    emit_pre(emit, 1);
+    assert(0);
+}
+
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+    assert(0);
+}
+
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+    assert(0);
+}
+
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+    assert(0);
+}
+
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+    assert(0);
+}
+
+void emit_load_const_verbatim_end(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+    assert(local_num >= 0);
+    emit_pre(emit, 1);
+    switch (local_num) {
+        case 0: emit_write_byte_1(emit, PYBC_LOAD_FAST_0); break;
+        case 1: emit_write_byte_1(emit, PYBC_LOAD_FAST_1); break;
+        case 2: emit_write_byte_1(emit, PYBC_LOAD_FAST_2); break;
+        default: emit_write_byte_1_uint(emit, PYBC_LOAD_FAST_N, local_num); break;
+    }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_NAME, qstr);
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_GLOBAL, qstr);
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    assert(0);
+}
+
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1);
+    assert(0);
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_ATTR, qstr);
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_qstr(emit, PYBC_LOAD_METHOD, qstr);
+}
+
+void emit_load_build_class(emitter_t *emit) {
+    emit_pre(emit, 1);
+    emit_write_byte_1(emit, PYBC_LOAD_BUILD_CLASS);
+}
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+    assert(local_num >= 0);
+    emit_pre(emit, -1);
+    switch (local_num) {
+        case 0: emit_write_byte_1(emit, PYBC_STORE_FAST_0); break;
+        case 1: emit_write_byte_1(emit, PYBC_STORE_FAST_1); break;
+        case 2: emit_write_byte_1(emit, PYBC_STORE_FAST_2); break;
+        default: emit_write_byte_1_uint(emit, PYBC_STORE_FAST_N, local_num); break;
+    }
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_qstr(emit, PYBC_STORE_NAME, qstr);
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_qstr(emit, PYBC_STORE_GLOBAL, qstr);
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1);
+    assert(0);
+}
+
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -2);
+    emit_write_byte_1_qstr(emit, PYBC_STORE_ATTR, qstr);
+}
+
+void emit_store_locals(emitter_t *emit) {
+    emit_pre(emit, -1);
+    emit_write_byte_1(emit, PYBC_STORE_LOCALS);
+}
+
+void emit_store_subscr(emitter_t *emit) {
+    emit_pre(emit, -3);
+    emit_write_byte_1(emit, PYBC_STORE_SUBSCR);
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+    assert(local_num >= 0);
+    emit_pre(emit, 0);
+    emit_write_byte_1_uint(emit, PYBC_DELETE_FAST_N, local_num);
+}
+
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_qstr(emit, PYBC_DELETE_NAME, qstr);
+}
+
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_qstr(emit, PYBC_DELETE_GLOBAL, qstr);
+}
+
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_qstr(emit, PYBC_DELETE_DEREF, qstr);
+}
+
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_qstr(emit, PYBC_DELETE_ATTR, qstr);
+}
+
+void emit_delete_subscr(emitter_t *emit) {
+    emit_pre(emit, -2);
+    emit_write_byte_1(emit, PYBC_DELETE_SUBSCR);
+}
+
+void emit_dup_top(emitter_t *emit) {
+    emit_pre(emit, 1);
+    emit_write_byte_1(emit, PYBC_DUP_TOP);
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+    emit_pre(emit, 2);
+    emit_write_byte_1(emit, PYBC_DUP_TOP_TWO);
+}
+
+void emit_pop_top(emitter_t *emit) {
+    emit_pre(emit, -1);
+    emit_write_byte_1(emit, PYBC_POP_TOP);
+}
+
+void emit_rot_two(emitter_t *emit) {
+    emit_pre(emit, 0);
+    emit_write_byte_1(emit, PYBC_ROT_TWO);
+}
+
+void emit_rot_three(emitter_t *emit) {
+    emit_pre(emit, 0);
+    emit_write_byte_1(emit, PYBC_ROT_THREE);
+}
+
+void emit_jump(emitter_t *emit, int label) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_label(emit, PYBC_JUMP, label);
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_TRUE, label);
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_FALSE, label);
+}
+
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_label(emit, PYBC_JUMP_IF_TRUE_OR_POP, label);
+}
+
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_label(emit, PYBC_JUMP_IF_FALSE_OR_POP, label);
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_label(emit, PYBC_SETUP_LOOP, label);
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_label(emit, PYBC_BREAK_LOOP, label);
+}
+
+void emit_continue_loop(emitter_t *emit, int label) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_label(emit, PYBC_CONTINUE_LOOP, label);
+}
+
+void emit_setup_with(emitter_t *emit, int label) {
+    emit_pre(emit, 7);
+    emit_write_byte_1_label(emit, PYBC_SETUP_WITH, label);
+}
+
+void emit_with_cleanup(emitter_t *emit) {
+    emit_pre(emit, -7);
+    emit_write_byte_1(emit, PYBC_WITH_CLEANUP);
+}
+
+void emit_setup_except(emitter_t *emit, int label) {
+    emit_pre(emit, 6);
+    emit_write_byte_1_label(emit, PYBC_SETUP_EXCEPT, label);
+}
+
+void emit_setup_finally(emitter_t *emit, int label) {
+    emit_pre(emit, 6);
+    emit_write_byte_1_label(emit, PYBC_SETUP_FINALLY, label);
+}
+
+void emit_end_finally(emitter_t *emit) {
+    emit_pre(emit, -1);
+    emit_write_byte_1(emit, PYBC_END_FINALLY);
+}
+
+void emit_get_iter(emitter_t *emit) {
+    emit_pre(emit, 0);
+    emit_write_byte_1(emit, PYBC_GET_ITER);
+}
+
+void emit_for_iter(emitter_t *emit, int label) {
+    emit_pre(emit, 1);
+    emit_write_byte_1_label(emit, PYBC_FOR_ITER, label);
+}
+
+void emit_for_iter_end(emitter_t *emit) {
+    emit_pre(emit, -1);
+}
+
+void emit_pop_block(emitter_t *emit) {
+    emit_pre(emit, 0);
+    emit_write_byte_1(emit, PYBC_POP_BLOCK);
+}
+
+void emit_pop_except(emitter_t *emit) {
+    emit_pre(emit, 0);
+    emit_write_byte_1(emit, PYBC_POP_EXCEPT);
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+    emit_pre(emit, 0);
+    emit_write_byte_1_byte(emit, PYBC_UNARY_OP, op);
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_byte(emit, PYBC_BINARY_OP, op);
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+    emit_pre(emit, -1);
+    emit_write_byte_1_byte(emit, PYBC_COMPARE_OP, op);
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, 1 - n_args);
+    emit_write_byte_1_uint(emit, PYBC_BUILD_TUPLE, n_args);
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, 1 - n_args);
+    emit_write_byte_1_uint(emit, PYBC_BUILD_LIST, n_args);
+}
+
+void emit_list_append(emitter_t *emit, int list_stack_index) {
+    assert(list_stack_index >= 0);
+    emit_pre(emit, -1);
+    emit_write_byte_1_uint(emit, PYBC_LIST_APPEND, list_stack_index);
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, 1);
+    emit_write_byte_1_uint(emit, PYBC_BUILD_MAP, n_args);
+}
+
+void emit_store_map(emitter_t *emit) {
+    emit_pre(emit, -2);
+    emit_write_byte_1(emit, PYBC_STORE_MAP);
+}
+
+void emit_map_add(emitter_t *emit, int map_stack_index) {
+    assert(map_stack_index >= 0);
+    emit_pre(emit, -2);
+    emit_write_byte_1_uint(emit, PYBC_MAP_ADD, map_stack_index);
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, 1 - n_args);
+    emit_write_byte_1_uint(emit, PYBC_BUILD_SET, n_args);
+}
+
+void emit_set_add(emitter_t *emit, int set_stack_index) {
+    assert(set_stack_index >= 0);
+    emit_pre(emit, -1);
+    emit_write_byte_1_uint(emit, PYBC_SET_ADD, set_stack_index);
+}
+
+void emit_build_slice(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, 1 - n_args);
+    emit_write_byte_1_uint(emit, PYBC_BUILD_SLICE, n_args);
+}
+
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, -1 + n_args);
+    emit_write_byte_1_uint(emit, PYBC_UNPACK_SEQUENCE, n_args);
+}
+
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+    assert(n_left >=0 && n_right >= 0);
+    emit_pre(emit, -1 + n_left + n_right + 1);
+    emit_write_byte_1_uint(emit, PYBC_UNPACK_EX, n_left | (n_right << 8));
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    assert(n_default_params == 0 && n_dict_params == 0);
+    emit_pre(emit, 1);
+    emit_write_byte_1_uint(emit, PYBC_MAKE_FUNCTION, scope->unique_code_id);
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    assert(0);
+    emit_pre(emit, -2 - n_default_params - 2 * n_dict_params);
+    if (emit->pass == PASS_3) {
+        printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params);
+    }
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    int s = 0;
+    if (have_star_arg) {
+        s += 1;
+    }
+    if (have_dbl_star_arg) {
+        s += 1;
+    }
+    emit_pre(emit, -n_positional - 2 * n_keyword - s);
+    int op;
+    if (have_star_arg) {
+        if (have_dbl_star_arg) {
+            op = PYBC_CALL_FUNCTION_VAR_KW;
+        } else {
+            op = PYBC_CALL_FUNCTION_VAR;
+        }
+    } else {
+        if (have_dbl_star_arg) {
+            op = PYBC_CALL_FUNCTION_KW;
+        } else {
+            op = PYBC_CALL_FUNCTION;
+        }
+    }
+    emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    int s = 0;
+    if (have_star_arg) {
+        s += 1;
+    }
+    if (have_dbl_star_arg) {
+        s += 1;
+    }
+    emit_pre(emit, -n_positional - 2 * n_keyword - s);
+    int op;
+    if (have_star_arg) {
+        if (have_dbl_star_arg) {
+            op = PYBC_CALL_METHOD_VAR_KW;
+        } else {
+            op = PYBC_CALL_METHOD_VAR;
+        }
+    } else {
+        if (have_dbl_star_arg) {
+            op = PYBC_CALL_METHOD_KW;
+        } else {
+            op = PYBC_CALL_METHOD;
+        }
+    }
+    emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints
+}
+
+void emit_return_value(emitter_t *emit) {
+    emit_pre(emit, -1);
+    emit->last_emit_was_return_value = true;
+    emit_write_byte_1(emit, PYBC_RETURN_VALUE);
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+    assert(n_args >= 0);
+    emit_pre(emit, -n_args);
+    emit_write_byte_1_uint(emit, PYBC_RAISE_VARARGS, n_args);
+}
+
+void emit_yield_value(emitter_t *emit) {
+    emit_pre(emit, 0);
+    if (emit->pass == PASS_2) {
+        emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+    }
+    emit_write_byte_1(emit, PYBC_YIELD_VALUE);
+}
+
+void emit_yield_from(emitter_t *emit) {
+    emit_pre(emit, -1);
+    if (emit->pass == PASS_2) {
+        emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+    }
+    emit_write_byte_1(emit, PYBC_YIELD_FROM);
+}
+
+#endif // EMIT_DO_BC
diff --git a/py/emitcommon.c b/py/emitcommon.c
new file mode 100644
index 0000000..1fd8697
--- /dev/null
+++ b/py/emitcommon.c
@@ -0,0 +1,171 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+
+#define EMIT(fun, arg...) (emit_##fun(emit, ##arg))
+
+void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr) {
+    if (pass == PASS_1) {
+        if (scope->kind == SCOPE_MODULE) {
+            printf("SyntaxError?: can't declare global in outer code\n");
+            return;
+        }
+        bool added;
+        id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added);
+        if (!added) {
+            printf("SyntaxError?: identifier already declared something\n");
+            return;
+        }
+        id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+
+        // if the id exists in the global scope, set its kind to EXPLICIT_GLOBAL
+        id_info = scope_find_global(scope, qstr);
+        if (id_info != NULL) {
+            id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+        }
+    }
+}
+
+void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr) {
+    if (pass == PASS_1) {
+        if (scope->kind == SCOPE_MODULE) {
+            printf("SyntaxError?: can't declare nonlocal in outer code\n");
+            return;
+        }
+        bool added;
+        id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added);
+        if (!added) {
+            printf("SyntaxError?: identifier already declared something\n");
+            return;
+        }
+        id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr);
+        if (id_info2 == NULL || !(id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) {
+            printf("SyntaxError: no binding for nonlocal '%s' found\n", qstr_str(qstr));
+            return;
+        }
+        id_info->kind = ID_INFO_KIND_FREE;
+        scope_close_over_in_parents(scope, qstr);
+    }
+}
+
+void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr) {
+    id_info_t *id_info = NULL;
+    if (pass == PASS_1) {
+        // name adding/lookup
+        bool added;
+        id_info = scope_find_or_add_id(scope, qstr, &added);
+        if (added) {
+            if (strcmp(qstr_str(qstr), "AssertionError") == 0) {
+                id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+                // TODO how much of a hack is this?
+            } else if (strcmp(qstr_str(qstr), "super") == 0 && scope->kind == SCOPE_FUNCTION) {
+                // special case, super is a global, and also counts as use of __class__
+                id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
+                id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr___class__);
+                if (id_info2 != NULL) {
+                    id_info2 = scope_find_or_add_id(scope, qstr___class__, &added);
+                    if (added) {
+                        id_info2->kind = ID_INFO_KIND_FREE;
+                        scope_close_over_in_parents(scope, qstr___class__);
+                    }
+                }
+            } else {
+                id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr);
+                if (id_info2 != NULL && (id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) {
+                    id_info->kind = ID_INFO_KIND_FREE;
+                    scope_close_over_in_parents(scope, qstr);
+                } else {
+                    id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT;
+                }
+            }
+        }
+    } else {
+        id_info = scope_find(scope, qstr);
+    }
+
+    assert(id_info != NULL); // TODO can this ever fail?
+
+    // call the emit backend with the correct code
+    if (id_info == NULL || id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+        EMIT(load_name, qstr);
+    } else if (id_info->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+        EMIT(load_global, qstr);
+    } else if (id_info->kind == ID_INFO_KIND_LOCAL) {
+        EMIT(load_fast, qstr, id_info->local_num);
+    } else if (id_info->kind == ID_INFO_KIND_CELL || id_info->kind == ID_INFO_KIND_FREE) {
+        EMIT(load_deref, qstr);
+    } else {
+        assert(0);
+    }
+}
+
+static id_info_t *get_id_for_modification(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
+    id_info_t *id_info = NULL;
+    if (pass == PASS_1) {
+        // name adding/lookup
+        bool added;
+        id_info = scope_find_or_add_id(scope, qstr, &added);
+        if (added) {
+            if (scope->kind == SCOPE_MODULE || scope->kind == SCOPE_CLASS) {
+                id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT;
+            } else {
+                id_info->kind = ID_INFO_KIND_LOCAL;
+            }
+        } else if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+            // rebind as a local variable
+            id_info->kind = ID_INFO_KIND_LOCAL;
+        }
+    } else {
+        id_info = scope_find(scope, qstr);
+    }
+
+    assert(id_info != NULL); // TODO can this ever fail?
+
+    return id_info;
+}
+
+void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
+    // create/get the id info
+    id_info_t *id = get_id_for_modification(pass, scope, emit, qstr);
+
+    // call the emit backend with the correct code
+    if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+        EMIT(store_name, qstr);
+    } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+        EMIT(store_global, qstr);
+    } else if (id->kind == ID_INFO_KIND_LOCAL) {
+        EMIT(store_fast, qstr, id->local_num);
+    } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
+        EMIT(store_deref, qstr);
+    } else {
+        assert(0);
+    }
+}
+
+void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) {
+    // create/get the id info
+    id_info_t *id = get_id_for_modification(pass, scope, emit, qstr);
+
+    // call the emit backend with the correct code
+    if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+        EMIT(delete_name, qstr);
+    } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+        EMIT(delete_global, qstr);
+    } else if (id->kind == ID_INFO_KIND_LOCAL) {
+        EMIT(delete_fast, qstr, id->local_num);
+    } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
+        EMIT(delete_deref, qstr);
+    } else {
+        assert(0);
+    }
+}
diff --git a/py/emitcpy.c b/py/emitcpy.c
new file mode 100644
index 0000000..637abd7
--- /dev/null
+++ b/py/emitcpy.c
@@ -0,0 +1,834 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "compile.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+
+#ifdef EMIT_DO_CPY
+
+struct _emitter_t {
+    int pass;
+    int next_label;
+    int byte_code_offset;
+    int stack_size;
+    bool last_emit_was_return_value;
+
+    scope_t *scope;
+
+    int max_num_labels;
+    int *label_offsets;
+};
+
+emitter_t *emit_new() {
+    emitter_t *emit = m_new(emitter_t, 1);
+    emit->max_num_labels = 0;
+    emit->label_offsets = NULL;
+    return emit;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+    emit->pass = pass;
+    emit->next_label = 1;
+    emit->byte_code_offset = 0;
+    emit->stack_size = 0;
+    emit->last_emit_was_return_value = false;
+    emit->scope = scope;
+    if (pass > PASS_1) {
+        if (emit->label_offsets == NULL) {
+            emit->label_offsets = m_new(int, emit->max_num_labels);
+        }
+        if (pass == PASS_2) {
+            memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(int));
+        }
+    }
+}
+
+void emit_end_pass(emitter_t *emit) {
+    // check stack is back to zero size
+    if (emit->stack_size != 0) {
+        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+    }
+
+    // calculate number of labels need
+    if (emit->pass == PASS_1) {
+        if (emit->next_label > emit->max_num_labels) {
+            emit->max_num_labels = emit->next_label;
+        }
+    }
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+    return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+    return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+    emit->stack_size = size;
+}
+
+static void emit_pre(emitter_t *emit, int stack_size_delta, int byte_code_size) {
+    emit->stack_size += stack_size_delta;
+    if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
+        emit->scope->stack_size = emit->stack_size;
+    }
+    emit->last_emit_was_return_value = false;
+    if (emit->pass == PASS_3 && byte_code_size > 0) {
+        if (emit->byte_code_offset >= 1000) {
+            printf("%d ", emit->byte_code_offset);
+        } else {
+            printf("% 4d ", emit->byte_code_offset);
+        }
+    }
+    emit->byte_code_offset += byte_code_size;
+}
+
+int emit_label_new(emitter_t *emit) {
+    return emit->next_label++;
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+    emit_pre(emit, 0, 0);
+    if (emit->pass > PASS_1) {
+        assert(l < emit->max_num_labels);
+        if (emit->pass == PASS_2) {
+            // assign label offset
+            assert(emit->label_offsets[l] == -1);
+            emit->label_offsets[l] = emit->byte_code_offset;
+        } else if (emit->pass == PASS_3) {
+            // ensure label offset has not changed from PASS_2 to PASS_3
+            assert(emit->label_offsets[l] == emit->byte_code_offset);
+            //printf("l%d: (at %d)\n", l, emit->byte_code_offset);
+        }
+    }
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("IMPORT_NAME %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_import_from(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("IMPORT_FROM %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_import_star(emitter_t *emit) {
+    emit_pre(emit, -1, 1);
+    if (emit->pass == PASS_3) {
+        printf("IMPORT_STAR\n");
+    }
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST ");
+        switch (tok) {
+            case PY_TOKEN_KW_FALSE: printf("False"); break;
+            case PY_TOKEN_KW_NONE: printf("None"); break;
+            case PY_TOKEN_KW_TRUE: printf("True"); break;
+            default: printf("?=%d\n", tok); return; assert(0);
+        }
+        printf("\n");
+    }
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST %d\n", arg);
+    }
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST '%s'\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST ");
+        emit_load_const_verbatim_quoted_str(emit, qstr, bytes);
+        printf("\n");
+    }
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST ");
+    }
+}
+
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+    if (emit->pass == PASS_3) {
+        printf("%d", val);
+    }
+}
+
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+    if (emit->pass == PASS_3) {
+        printf("%s", str);
+    }
+}
+
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+    if (emit->pass == PASS_3) {
+        printf("%.*s", len, str);
+    }
+}
+
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+    // TODO strings should be escaped before we get here
+    if (emit->pass == PASS_3) {
+        const char *str = qstr_str(qstr);
+        int len = strlen(str);
+        bool has_single_quote = false;
+        bool has_double_quote = false;
+        for (int i = 0; i < len; i++) {
+            if (str[i] == '\'') {
+                has_single_quote = true;
+            } else if (str[i] == '"') {
+                has_double_quote = true;
+            }
+        }
+        if (bytes) {
+            printf("b");
+        }
+        bool quote_single = false;
+        if (has_single_quote && !has_double_quote) {
+            printf("\"");
+        } else {
+            quote_single = true;
+            printf("'");
+        }
+        for (int i = 0; i < len; i++) {
+            if (str[i] == '\n') {
+                printf("\\n");
+            } else if (str[i] == '\\' && str[i + 1] == '\'') {
+                i += 1;
+                if (quote_single) {
+                    printf("\\'");
+                } else {
+                    printf("'");
+                }
+            } else if (str[i] == '\'' && quote_single) {
+                printf("\\'");
+            } else {
+                printf("%c", str[i]);
+            }
+        }
+        if (has_single_quote && !has_double_quote) {
+            printf("\"");
+        } else {
+            printf("'");
+        }
+    }
+}
+
+void emit_load_const_verbatim_end(emitter_t *emit) {
+    if (emit->pass == PASS_3) {
+        printf("\n");
+    }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_NAME %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_GLOBAL %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_FAST %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_DEREF %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CLOSURE %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_ATTR %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+    emit_load_attr(emit, qstr);
+}
+
+void emit_load_build_class(emitter_t *emit) {
+    emit_pre(emit, 1, 1);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_BUILD_CLASS\n");
+    }
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("STORE_NAME %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("STORE_GLOBAL %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("STORE_FAST %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("STORE_DEREF %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -2, 3);
+    if (emit->pass == PASS_3) {
+        printf("STORE_ATTR %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_store_locals(emitter_t *emit) {
+    emit_pre(emit, -1, 1);
+    if (emit->pass == PASS_3) {
+        printf("STORE_LOCALS\n");
+    }
+}
+
+void emit_store_subscr(emitter_t *emit) {
+    emit_pre(emit, -3, 1);
+    if (emit->pass == PASS_3) {
+        printf("STORE_SUBSCR\n");
+    }
+}
+
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("DELETE_NAME %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("DELETE_GLOBAL %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("DELETE_FAST %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("DELETE_DEREF %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("DELETE_ATTR %s\n", qstr_str(qstr));
+    }
+}
+
+void emit_delete_subscr(emitter_t *emit) {
+    emit_pre(emit, -2, 1);
+    if (emit->pass == PASS_3) {
+        printf("DELETE_SUBSCR\n");
+    }
+}
+
+void emit_dup_top(emitter_t *emit) {
+    emit_pre(emit, 1, 1);
+    if (emit->pass == PASS_3) {
+        printf("DUP_TOP\n");
+    }
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+    emit_pre(emit, 2, 1);
+    if (emit->pass == PASS_3) {
+        printf("DUP_TOP_TWO\n");
+    }
+}
+
+void emit_pop_top(emitter_t *emit) {
+    emit_pre(emit, -1, 1);
+    if (emit->pass == PASS_3) {
+        printf("POP_TOP\n");
+    }
+}
+
+void emit_rot_two(emitter_t *emit) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        printf("ROT_TWO\n");
+    }
+}
+
+void emit_rot_three(emitter_t *emit) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        printf("ROT_THREE\n");
+    }
+}
+
+void emit_jump(emitter_t *emit, int label) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        int dest = emit->label_offsets[label];
+        if (dest < emit->byte_code_offset) {
+            printf("JUMP_ABSOLUTE %d\n", emit->label_offsets[label]);
+        } else {
+            printf("JUMP_FORWARD %d\n", emit->label_offsets[label]);
+        }
+    }
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("POP_JUMP_IF_TRUE %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("POP_JUMP_IF_FALSE %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("JUMP_IF_TRUE_OR_POP %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("JUMP_IF_FALSE_OR_POP %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("SETUP_LOOP %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        printf("BREAK_LOOP\n"); // CPython doesn't have label
+        //printf("BREAK_LOOP %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_continue_loop(emitter_t *emit, int label) {
+    emit_pre(emit, 0, 3);
+    if (emit->pass == PASS_3) {
+        printf("CONTINUE_LOOP %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_setup_with(emitter_t *emit, int label) {
+    emit_pre(emit, 7, 3);
+    if (emit->pass == PASS_3) {
+        printf("SETUP_WITH %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_with_cleanup(emitter_t *emit) {
+    emit_pre(emit, -7, 1);
+    if (emit->pass == PASS_3) {
+        printf("WITH_CLEANUP\n");
+    }
+}
+
+void emit_setup_except(emitter_t *emit, int label) {
+    emit_pre(emit, 6, 3);
+    if (emit->pass == PASS_3) {
+        printf("SETUP_EXCEPT %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_setup_finally(emitter_t *emit, int label) {
+    emit_pre(emit, 6, 3);
+    if (emit->pass == PASS_3) {
+        printf("SETUP_FINALLY %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_end_finally(emitter_t *emit) {
+    emit_pre(emit, -1, 1);
+    if (emit->pass == PASS_3) {
+        printf("END_FINALLY\n");
+    }
+}
+
+void emit_get_iter(emitter_t *emit) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        printf("GET_ITER\n");
+    }
+}
+
+void emit_for_iter(emitter_t *emit, int label) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("FOR_ITER %d\n", emit->label_offsets[label]);
+    }
+}
+
+void emit_for_iter_end(emitter_t *emit) {
+    emit_pre(emit, -1, 0);
+}
+
+void emit_pop_block(emitter_t *emit) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        printf("POP_BLOCK\n");
+    }
+}
+
+void emit_pop_except(emitter_t *emit) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        printf("POP_EXCEPT\n");
+    }
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_3) {
+        switch (op) {
+            case RT_UNARY_OP_NOT: printf("UNARY_NOT\n"); break;
+            case RT_UNARY_OP_POSITIVE: printf("UNARY_POSITIVE\n"); break;
+            case RT_UNARY_OP_NEGATIVE: printf("UNARY_NEGATIVE\n"); break;
+            case RT_UNARY_OP_INVERT: printf("UNARY_INVERT\n"); break;
+            default: assert(0);
+        }
+    }
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+    emit_pre(emit, -1, 1);
+    if (emit->pass == PASS_3) {
+        switch (op) {
+            case RT_BINARY_OP_SUBSCR: printf("BINARY_SUBSCR\n"); break;
+            case RT_BINARY_OP_OR: printf("BINARY_OR\n"); break;
+            case RT_BINARY_OP_XOR: printf("BINARY_XOR\n"); break;
+            case RT_BINARY_OP_AND: printf("BINARY_AND\n"); break;
+            case RT_BINARY_OP_LSHIFT: printf("BINARY_LSHIFT\n"); break;
+            case RT_BINARY_OP_RSHIFT: printf("BINARY_RSHIFT\n"); break;
+            case RT_BINARY_OP_ADD: printf("BINARY_ADD\n"); break;
+            case RT_BINARY_OP_SUBTRACT: printf("BINARY_SUBTRACT\n"); break;
+            case RT_BINARY_OP_MULTIPLY: printf("BINARY_MULTIPLY\n"); break;
+            case RT_BINARY_OP_FLOOR_DIVIDE: printf("BINARY_FLOOR_DIVIDE\n"); break;
+            case RT_BINARY_OP_TRUE_DIVIDE: printf("BINARY_TRUE_DIVIDE\n"); break;
+            case RT_BINARY_OP_MODULO: printf("BINARY_MODULO\n"); break;
+            case RT_BINARY_OP_POWER: printf("BINARY_POWER\n"); break;
+            case RT_BINARY_OP_INPLACE_OR: printf("INPLACE_OR\n"); break;
+            case RT_BINARY_OP_INPLACE_XOR: printf("INPLACE_XOR\n"); break;
+            case RT_BINARY_OP_INPLACE_AND: printf("INPLACE_AND\n"); break;
+            case RT_BINARY_OP_INPLACE_LSHIFT: printf("INPLACE_LSHIFT\n"); break;
+            case RT_BINARY_OP_INPLACE_RSHIFT: printf("INPLACE_RSHIFT\n"); break;
+            case RT_BINARY_OP_INPLACE_ADD: printf("INPLACE_ADD\n"); break;
+            case RT_BINARY_OP_INPLACE_SUBTRACT: printf("INPLACE_SUBTRACT\n"); break;
+            case RT_BINARY_OP_INPLACE_MULTIPLY: printf("INPLACE_MULTIPLY\n"); break;
+            case RT_BINARY_OP_INPLACE_FLOOR_DIVIDE: printf("INPLACE_FLOOR_DIVIDE\n"); break;
+            case RT_BINARY_OP_INPLACE_TRUE_DIVIDE: printf("INPLACE_TRUE_DIVIDE\n"); break;
+            case RT_BINARY_OP_INPLACE_MODULO: printf("INPLACE_MODULO\n"); break;
+            case RT_BINARY_OP_INPLACE_POWER: printf("INPLACE_POWER\n"); break;
+            default: assert(0);
+        }
+    }
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        switch (op) {
+            case RT_COMPARE_OP_LESS: printf("COMPARE_OP <\n"); break;
+            case RT_COMPARE_OP_MORE: printf("COMPARE_OP >\n"); break;
+            case RT_COMPARE_OP_EQUAL: printf("COMPARE_OP ==\n"); break;
+            case RT_COMPARE_OP_LESS_EQUAL: printf("COMPARE_OP <=\n"); break;
+            case RT_COMPARE_OP_MORE_EQUAL: printf("COMPARE_OP >=\n"); break;
+            case RT_COMPARE_OP_NOT_EQUAL: printf("COMPARE_OP !=\n"); break;
+            case RT_COMPARE_OP_IN: printf("COMPARE_OP in\n"); break;
+            case RT_COMPARE_OP_NOT_IN: printf("COMPARE_OP not in\n"); break;
+            case RT_COMPARE_OP_IS: printf("COMPARE_OP is\n"); break;
+            case RT_COMPARE_OP_IS_NOT: printf("COMPARE_OP is not\n"); break;
+            case RT_COMPARE_OP_EXCEPTION_MATCH: printf("COMPARE_OP exception match\n"); break;
+            default: assert(0);
+        }
+    }
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+    emit_pre(emit, 1 - n_args, 3);
+    if (emit->pass == PASS_3) {
+        printf("BUILD_TUPLE %d\n", n_args);
+    }
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+    emit_pre(emit, 1 - n_args, 3);
+    if (emit->pass == PASS_3) {
+        printf("BUILD_LIST %d\n", n_args);
+    }
+}
+
+void emit_list_append(emitter_t *emit, int list_index) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LIST_APPEND %d\n", list_index);
+    }
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("BUILD_MAP %d\n", n_args);
+    }
+}
+
+void emit_store_map(emitter_t *emit) {
+    emit_pre(emit, -2, 1);
+    if (emit->pass == PASS_3) {
+        printf("STORE_MAP\n");
+    }
+}
+
+void emit_map_add(emitter_t *emit, int map_index) {
+    emit_pre(emit, -2, 3);
+    if (emit->pass == PASS_3) {
+        printf("MAP_ADD %d\n", map_index);
+    }
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+    emit_pre(emit, 1 - n_args, 3);
+    if (emit->pass == PASS_3) {
+        printf("BUILD_SET %d\n", n_args);
+    }
+}
+
+void emit_set_add(emitter_t *emit, int set_index) {
+    emit_pre(emit, -1, 3);
+    if (emit->pass == PASS_3) {
+        printf("SET_ADD %d\n", set_index);
+    }
+}
+
+void emit_build_slice(emitter_t *emit, int n_args) {
+    emit_pre(emit, 1 - n_args, 3);
+    if (emit->pass == PASS_3) {
+        printf("BUILD_SLICE %d\n", n_args);
+    }
+}
+
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+    emit_pre(emit, -1 + n_args, 3);
+    if (emit->pass == PASS_3) {
+        printf("UNPACK_SEQUENCE %d\n", n_args);
+    }
+}
+
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+    emit_pre(emit, -1 + n_left + n_right + 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("UNPACK_EX %d\n", n_left | (n_right << 8));
+    }
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    int s = 0;
+    if (have_star_arg) {
+        s += 1;
+    }
+    if (have_dbl_star_arg) {
+        s += 1;
+    }
+    emit_pre(emit, -n_positional - 2 * n_keyword - s, 3);
+    if (emit->pass == PASS_3) {
+        if (have_star_arg) {
+            if (have_dbl_star_arg) {
+                printf("CALL_FUNCTION_VAR_KW");
+            } else {
+                printf("CALL_FUNCTION_VAR");
+            }
+        } else {
+            if (have_dbl_star_arg) {
+                printf("CALL_FUNCTION_KW");
+            } else {
+                printf("CALL_FUNCTION");
+            }
+        }
+        printf(" %d, %d\n", n_positional, n_keyword);
+    }
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    emit_call_function(emit, n_positional, n_keyword, have_star_arg, have_dbl_star_arg);
+}
+
+void emit_return_value(emitter_t *emit) {
+    emit_pre(emit, -1, 1);
+    emit->last_emit_was_return_value = true;
+    if (emit->pass == PASS_3) {
+        printf("RETURN_VALUE\n");
+    }
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+    emit_pre(emit, -n_args, 3);
+    if (emit->pass == PASS_3) {
+        printf("RAISE_VARARGS %d\n", n_args);
+    }
+}
+
+void load_const_code_and_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST code %s\n", qstr_str(qstr));
+    }
+    // load qualified name
+    emit_pre(emit, 1, 3);
+    if (emit->pass == PASS_3) {
+        printf("LOAD_CONST '");
+        // code just to work out the qualname (or whatever it is)
+        {
+            int depth = 0;
+            for (scope_t *s = emit->scope; s->parent != NULL; s = s->parent) {
+                depth += 1;
+            }
+            for (int wanted_depth = depth; wanted_depth >= 0; wanted_depth--) {
+                scope_t *s = emit->scope;
+                for (int i = 0; i < wanted_depth; i++) {
+                    s = s->parent;
+                }
+                if (s->kind == SCOPE_FUNCTION) {
+                    printf("%s.<locals>.", qstr_str(s->simple_name));
+                } else if (s->kind == SCOPE_CLASS) {
+                    printf("%s.", qstr_str(s->simple_name));
+                }
+            }
+        }
+        printf("%s'\n", qstr_str(qstr));
+    }
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    load_const_code_and_name(emit, scope->simple_name);
+    emit_pre(emit, -1 - n_default_params - 2 * n_dict_params, 3);
+    if (emit->pass == PASS_3) {
+        printf("MAKE_FUNCTION %d\n", (n_dict_params << 8) | n_default_params);
+    }
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    load_const_code_and_name(emit, scope->simple_name);
+    emit_pre(emit, -2 - n_default_params - 2 * n_dict_params, 3);
+    if (emit->pass == PASS_3) {
+        printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params);
+    }
+}
+
+void emit_yield_value(emitter_t *emit) {
+    emit_pre(emit, 0, 1);
+    if (emit->pass == PASS_2) {
+        emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+    }
+    if (emit->pass == PASS_3) {
+        printf("YIELD_VALUE\n");
+    }
+}
+
+void emit_yield_from(emitter_t *emit) {
+    emit_pre(emit, -1, 1);
+    if (emit->pass == PASS_2) {
+        emit->scope->flags |= SCOPE_FLAG_GENERATOR;
+    }
+    if (emit->pass == PASS_3) {
+        printf("YIELD_FROM\n");
+    }
+}
+
+#endif // EMIT_DO_CPY
diff --git a/py/emitthumb.c b/py/emitthumb.c
new file mode 100644
index 0000000..cad6b65
--- /dev/null
+++ b/py/emitthumb.c
@@ -0,0 +1,673 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+#include "asmthumb.h"
+
+#ifdef EMIT_DO_THUMB
+
+#define REG_LOCAL_1 (REG_R4)
+#define REG_LOCAL_2 (REG_R5)
+#define REG_LOCAL_3 (REG_R6)
+#define REG_TEMP    (REG_R7)
+#define REG_LOCAL_NUM (3)
+
+typedef enum {
+    NEED_TO_PUSH_NOTHING,
+    NEED_TO_PUSH_REG,
+    NEED_TO_PUSH_I32,
+} need_to_push_t;
+
+struct _emitter_t {
+    int pass;
+    int stack_start;
+    int stack_size;
+    bool last_emit_was_return_value;
+    need_to_push_t need_to_push;
+    int last_reg;
+    int32_t last_i32;
+
+    scope_t *scope;
+
+    asm_thumb_t *as;
+    bool do_native_types;
+};
+
+emitter_t *emit_new() {
+    emitter_t *emit = m_new(emitter_t, 1);
+    emit->as = asm_thumb_new();
+    emit->do_native_types = true;
+    return emit;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+    emit->pass = pass;
+    emit->stack_start = 0;
+    emit->stack_size = 0;
+    emit->last_emit_was_return_value = false;
+    emit->need_to_push = NEED_TO_PUSH_NOTHING;
+    emit->scope = scope;
+    if (pass == PASS_1) {
+        scope->unique_code_id = rt_get_new_unique_code_id();
+    }
+
+    asm_thumb_start_pass(emit->as, pass);
+
+    // entry to function
+    int num_locals = 0;
+    if (pass > PASS_1) {
+        num_locals = scope->num_locals - REG_LOCAL_NUM;
+        if (num_locals < 0) {
+            num_locals = 0;
+        }
+        emit->stack_start = num_locals;
+        num_locals += scope->stack_size;
+    }
+    asm_thumb_entry(emit->as, num_locals);
+
+    // initialise locals from parameters
+    for (int i = 0; i < scope->num_params; i++) {
+        if (i == 0) {
+            asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1);
+        } else if (i == 1) {
+            asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2);
+        } else if (i == 2) {
+            asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3);
+        } else if (i == 3) {
+            asm_thumb_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4);
+        } else {
+            // TODO not implemented
+            assert(0);
+        }
+    }
+
+    asm_thumb_mov_reg_i32(emit->as, REG_R7, (machine_uint_t)rt_fun_table);
+}
+
+void emit_end_pass(emitter_t *emit) {
+    if (!emit->last_emit_was_return_value) {
+        asm_thumb_exit(emit->as);
+    }
+    asm_thumb_end_pass(emit->as);
+
+    // check stack is back to zero size
+    if (emit->stack_size != 0) {
+        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+    }
+
+    if (emit->pass == PASS_3) {
+        py_fun_t f = asm_thumb_get_code(emit->as);
+        rt_assign_native_code(emit->scope->unique_code_id, f, asm_thumb_get_code_size(emit->as), emit->scope->num_params);
+    }
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+    return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+    return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+    emit->stack_size = size;
+}
+
+static void adjust_stack(emitter_t *emit, int stack_size_delta) {
+    emit->stack_size += stack_size_delta;
+    assert(emit->stack_size >= 0);
+    if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
+        emit->scope->stack_size = emit->stack_size;
+    }
+}
+
+static void stack_settle(emitter_t *emit) {
+    switch (emit->need_to_push) {
+        case NEED_TO_PUSH_NOTHING:
+            break;
+
+        case NEED_TO_PUSH_REG:
+            asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, emit->last_reg);
+            adjust_stack(emit, 1);
+            break;
+
+        case NEED_TO_PUSH_I32:
+            asm_thumb_mov_reg_i32_optimised(emit->as, REG_R0, emit->last_i32);
+            asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, REG_R0);
+            adjust_stack(emit, 1);
+            break;
+    }
+    emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_raw(emitter_t *emit, int stack_size_delta) {
+    adjust_stack(emit, stack_size_delta);
+    emit->last_emit_was_return_value = false;
+}
+
+static void emit_pre(emitter_t *emit) {
+    stack_settle(emit);
+    emit_pre_raw(emit, 0);
+}
+
+static void emit_pre_pop_reg(emitter_t *emit, int reg_dest) {
+    switch (emit->need_to_push) {
+        case NEED_TO_PUSH_NOTHING:
+            asm_thumb_mov_reg_local(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1);
+            emit_pre_raw(emit, -1);
+            break;
+
+        case NEED_TO_PUSH_REG:
+            emit_pre_raw(emit, 0);
+            if (emit->last_reg != reg_dest) {
+                asm_thumb_mov_reg_reg(emit->as, reg_dest, emit->last_reg);
+            }
+            break;
+
+        case NEED_TO_PUSH_I32:
+            emit_pre_raw(emit, 0);
+            asm_thumb_mov_reg_i32_optimised(emit->as, reg_dest, emit->last_i32);
+            break;
+    }
+    emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_pop_reg_reg(emitter_t *emit, int rega, int regb) {
+    emit_pre_pop_reg(emit, rega);
+    asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1);
+    adjust_stack(emit, -1);
+}
+
+static void emit_pre_pop_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) {
+    emit_pre_pop_reg(emit, rega);
+    asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1);
+    asm_thumb_mov_reg_local(emit->as, regc, emit->stack_start + emit->stack_size - 2);
+    adjust_stack(emit, -2);
+}
+
+static void emit_post(emitter_t *emit) {
+}
+
+static void emit_post_push_reg(emitter_t *emit, int reg) {
+    emit->need_to_push = NEED_TO_PUSH_REG;
+    emit->last_reg = reg;
+}
+
+static void emit_post_push_i32(emitter_t *emit, int32_t i32) {
+    emit->need_to_push = NEED_TO_PUSH_I32;
+    emit->last_i32 = i32;
+}
+
+static void emit_post_push_reg_reg(emitter_t *emit, int rega, int regb) {
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
+    emit->need_to_push = NEED_TO_PUSH_REG;
+    emit->last_reg = regb;
+    adjust_stack(emit, 1);
+}
+
+static void emit_post_push_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) {
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb);
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc);
+    adjust_stack(emit, 3);
+}
+
+static void emit_post_push_reg_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc, int regd) {
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega);
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb);
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc);
+    asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 3, regd);
+    adjust_stack(emit, 4);
+}
+
+static void emit_get_stack_pointer_to_reg_for_pop(emitter_t *emit, int reg_dest, int n_pop) {
+    asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1);
+    adjust_stack(emit, -n_pop);
+}
+
+static void emit_get_stack_pointer_to_reg_for_push(emitter_t *emit, int reg_dest, int n_push) {
+    asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size + n_push - 1);
+    adjust_stack(emit, n_push);
+}
+
+static void emit_call(emitter_t *emit, rt_fun_kind_t fun_kind) {
+    asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3);
+}
+
+static void emit_call_with_i32_arg(emitter_t *emit, rt_fun_kind_t fun_kind, int32_t arg_val, int arg_reg) {
+    asm_thumb_mov_reg_i32_optimised(emit->as, arg_reg, arg_val);
+    asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3);
+}
+
+int emit_label_new(emitter_t *emit) {
+    return asm_thumb_label_new(emit->as);
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+    asm_thumb_label_assign(emit->as, l);
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_import_from(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_import_star(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+    emit_pre(emit);
+    py_obj_t o;
+    switch (tok) {
+        case PY_TOKEN_KW_NONE: o = py_const_none; break;
+        case PY_TOKEN_KW_FALSE: o = py_const_false; break;
+        case PY_TOKEN_KW_TRUE: o = py_const_true; break;
+        default: assert(0); o = 0; // shouldn't happen
+    }
+    emit_post_push_i32(emit, (machine_uint_t)o);
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+    emit_pre(emit);
+    if (emit->do_native_types) {
+        emit_post_push_i32(emit, arg);
+    } else {
+        emit_post_push_i32(emit, (arg << 1) | 1);
+    }
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+    emit_pre(emit);
+    emit_call_with_i32_arg(emit, RT_F_LOAD_CONST_STR, qstr, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+    assert(0);
+}
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+    assert(0);
+}
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+    assert(0);
+}
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+    assert(0);
+}
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+    assert(0);
+}
+void emit_load_const_verbatim_end(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+    emit_pre(emit);
+    if (local_num == 0) {
+        emit_post_push_reg(emit, REG_LOCAL_1);
+    } else if (local_num == 1) {
+        emit_post_push_reg(emit, REG_LOCAL_2);
+    } else if (local_num == 2) {
+        emit_post_push_reg(emit, REG_LOCAL_3);
+    } else {
+        asm_thumb_mov_reg_local(emit->as, REG_R0, local_num - 1);
+        emit_post_push_reg(emit, REG_R0);
+    }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit);
+    emit_call_with_i32_arg(emit, RT_F_LOAD_NAME, qstr, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit);
+    emit_call_with_i32_arg(emit, RT_F_LOAD_GLOBAL, qstr, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+    emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base
+    emit_call_with_i32_arg(emit, RT_F_LOAD_ATTR, qstr, REG_ARG_2); // arg2 = attribute name
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+    emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base
+    emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
+    emit_call_with_i32_arg(emit, RT_F_LOAD_METHOD, qstr, REG_ARG_2); // arg2 = method name
+}
+
+void emit_load_build_class(emitter_t *emit) {
+   assert(0);
+} // basically load __build_class__ from builtins
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+    if (local_num == 0) {
+        emit_pre_pop_reg(emit, REG_LOCAL_1);
+    } else if (local_num == 1) {
+        emit_pre_pop_reg(emit, REG_LOCAL_2);
+    } else if (local_num == 2) {
+        emit_pre_pop_reg(emit, REG_LOCAL_3);
+    } else {
+        emit_pre_pop_reg(emit, REG_R0);
+        asm_thumb_mov_local_reg(emit->as, local_num - 1, REG_R0);
+    }
+    emit_post(emit);
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+    emit_pre_pop_reg(emit, REG_ARG_2);
+    emit_call_with_i32_arg(emit, RT_F_STORE_NAME, qstr, REG_ARG_1); // arg1 = name
+    emit_post(emit);
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_store_locals(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_store_subscr(emitter_t *emit) {
+    emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store
+    emit_call(emit, RT_F_STORE_SUBSCR);
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+    assert(0);
+}
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_subscr(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_dup_top(emitter_t *emit) {
+    emit_pre_pop_reg(emit, REG_R0);
+    emit_post_push_reg_reg(emit, REG_R0, REG_R0);
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+    emit_pre_pop_reg_reg(emit, REG_R0, REG_R1);
+    emit_post_push_reg_reg_reg_reg(emit, REG_R1, REG_R0, REG_R1, REG_R0);
+}
+
+void emit_pop_top(emitter_t *emit) {
+    emit_pre_pop_reg(emit, REG_R0);
+    emit_post(emit);
+}
+
+void emit_rot_two(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_rot_three(emitter_t *emit) {
+    emit_pre_pop_reg_reg_reg(emit, REG_R0, REG_R1, REG_R2);
+    emit_post_push_reg_reg_reg(emit, REG_R0, REG_R2, REG_R1);
+}
+
+void emit_jump(emitter_t *emit, int label) {
+    emit_pre(emit);
+    asm_thumb_b_label(emit->as, label);
+    emit_post(emit);
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+    if (emit->do_native_types) {
+        emit_pre_pop_reg(emit, REG_RET);
+        asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label);
+        emit_post(emit);
+    } else {
+        emit_pre_pop_reg(emit, REG_ARG_1);
+        emit_call(emit, RT_F_IS_TRUE);
+        asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label);
+        emit_post(emit);
+    }
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+    assert(0);
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+    emit_pre(emit);
+    emit_post(emit);
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_continue_loop(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_setup_with(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_with_cleanup(emitter_t *emit) {
+    assert(0);
+}
+void emit_setup_except(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_setup_finally(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_end_finally(emitter_t *emit) {
+    assert(0);
+}
+void emit_get_iter(emitter_t *emit) {
+    assert(0);
+} // tos = getiter(tos)
+void emit_for_iter(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_for_iter_end(emitter_t *emit) {
+    assert(0);
+}
+void emit_pop_except(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+    emit_pre_pop_reg(emit, REG_ARG_2);
+    emit_call_with_i32_arg(emit, RT_F_UNARY_OP, op, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+    assert(0);
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+    emit_pre(emit);
+    emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+    emit_call_with_i32_arg(emit, RT_F_BUILD_LIST, n_args, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET); // new list
+}
+
+void emit_list_append(emitter_t *emit, int list_index) {
+    assert(0);
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+    emit_pre(emit);
+    emit_call_with_i32_arg(emit, RT_F_BUILD_MAP, n_args, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET); // new map
+}
+
+void emit_store_map(emitter_t *emit) {
+    emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map
+    emit_call(emit, RT_F_STORE_MAP);
+    emit_post_push_reg(emit, REG_RET); // map
+}
+
+void emit_map_add(emitter_t *emit, int map_index) {
+    assert(0);
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+    emit_pre(emit);
+    emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+    emit_call_with_i32_arg(emit, RT_F_BUILD_SET, n_args, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET); // new set
+}
+
+void emit_set_add(emitter_t *emit, int set_index) {
+    assert(0);
+}
+void emit_build_slice(emitter_t *emit, int n_args) {
+    assert(0);
+}
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+    assert(0);
+}
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+    assert(0);
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    assert(n_default_params == 0 && n_dict_params == 0);
+    emit_pre(emit);
+    emit_call_with_i32_arg(emit, RT_F_MAKE_FUNCTION_FROM_ID, scope->unique_code_id, REG_ARG_1);
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    assert(0);
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+    if (n_positional == 0) {
+        emit_pre_pop_reg(emit, REG_ARG_1); // the function
+        emit_call(emit, RT_F_CALL_FUNCTION_0);
+    } else if (n_positional == 1) {
+        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function
+        emit_call(emit, RT_F_CALL_FUNCTION_1);
+    } else if (n_positional == 2) {
+        emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function
+        emit_call(emit, RT_F_CALL_FUNCTION_2);
+    } else {
+        assert(0);
+    }
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+    if (n_positional == 0) {
+        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method
+        emit_call(emit, RT_F_CALL_METHOD_1);
+    } else if (n_positional == 1) {
+        emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method
+        emit_call(emit, RT_F_CALL_METHOD_2);
+    } else {
+        assert(0);
+    }
+    emit_post_push_reg(emit, REG_RET);
+}
+
+void emit_pop_block(emitter_t *emit) {
+    emit_pre(emit);
+    emit_post(emit);
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+    if (emit->do_native_types) {
+        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1);
+        asm_thumb_add_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, REG_ARG_2);
+        emit_post_push_reg(emit, REG_RET);
+    } else {
+        emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2);
+        emit_call_with_i32_arg(emit, RT_F_BINARY_OP, op, REG_ARG_1);
+        emit_post_push_reg(emit, REG_RET);
+    }
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+    if (emit->do_native_types) {
+        emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1);
+        asm_thumb_cmp_reg_reg(emit->as, REG_ARG_1, REG_ARG_2);
+        asm_thumb_ite_ge(emit->as);
+        asm_thumb_mov_reg_i8(emit->as, REG_RET, 0); // if r0 >= r1
+        asm_thumb_mov_reg_i8(emit->as, REG_RET, 1); // if r0 < r1
+        emit_post_push_reg(emit, REG_RET);
+    } else {
+        emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2);
+        emit_call_with_i32_arg(emit, RT_F_COMPARE_OP, op, REG_ARG_1);
+        emit_post_push_reg(emit, REG_RET);
+    }
+}
+
+void emit_return_value(emitter_t *emit) {
+    emit_pre_pop_reg(emit, REG_RET);
+    emit->last_emit_was_return_value = true;
+    //asm_thumb_call_ind(emit->as, 0, REG_R0); to seg fault for debugging with gdb
+    asm_thumb_exit(emit->as);
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+    assert(0);
+}
+void emit_yield_value(emitter_t *emit) {
+    assert(0);
+}
+void emit_yield_from(emitter_t *emit) {
+    assert(0);
+}
+
+#endif // EMIT_DO_THUMB
diff --git a/py/emitx64.c b/py/emitx64.c
new file mode 100644
index 0000000..da4c7e3
--- /dev/null
+++ b/py/emitx64.c
@@ -0,0 +1,680 @@
+/* This code is equivalent to emitx64.c but pre-allocates stack
+ * space and uses mov instead of push/pop instructions to access
+ * the temporary stack.  It runs in similar time, but uses 3*n
+ * more bytes, where n is number of push/pop instructions.
+ *
+ * This code is preferred because it keeps the stack aligned on a
+ * 16 byte boundary.
+ *
+ * Improvements:
+ *  Doesn't call stub functions, does all the work inline.
+ *  Has optimisations for loading i64s to stack.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+#include "runtime.h"
+#include "emit.h"
+#include "asmx64.h"
+
+#ifdef EMIT_DO_X64
+
+#define REG_LOCAL_1 (REG_RBX)
+#define REG_LOCAL_NUM (1)
+
+typedef enum {
+    NEED_TO_PUSH_NOTHING,
+    NEED_TO_PUSH_R64,
+    NEED_TO_PUSH_I64,
+} need_to_push_t;
+
+struct _emitter_t {
+    int pass;
+    int stack_start;
+    int stack_size;
+    bool last_emit_was_return_value;
+    need_to_push_t need_to_push;
+    int last_r64;
+    int64_t last_i64;
+
+    scope_t *scope;
+
+    asm_x64_t *as;
+    bool do_native_types;
+};
+
+emitter_t *emit_new() {
+    emitter_t *emit = m_new(emitter_t, 1);
+    emit->as = asm_x64_new();
+    emit->do_native_types = false;
+    return emit;
+}
+
+void emit_set_native_types(emitter_t *emit, bool do_native_types) {
+    emit->do_native_types = do_native_types;
+}
+
+void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) {
+    emit->pass = pass;
+    emit->stack_start = 0;
+    emit->stack_size = 0;
+    emit->last_emit_was_return_value = false;
+    emit->need_to_push = NEED_TO_PUSH_NOTHING;
+    emit->scope = scope;
+    if (pass == PASS_1) {
+        scope->unique_code_id = rt_get_new_unique_code_id();
+    }
+
+    asm_x64_start_pass(emit->as, pass);
+
+    // entry to function
+    int num_locals = 0;
+    if (pass > PASS_1) {
+        num_locals = scope->num_locals - REG_LOCAL_NUM;
+        if (num_locals < 0) {
+            num_locals = 0;
+        }
+        emit->stack_start = num_locals;
+        num_locals += scope->stack_size;
+    }
+    asm_x64_entry(emit->as, num_locals);
+
+    // initialise locals from parameters
+    for (int i = 0; i < scope->num_params; i++) {
+        if (i == 0) {
+            asm_x64_mov_r64_to_r64(emit->as, REG_ARG_1, REG_LOCAL_1);
+        } else if (i == 1) {
+            asm_x64_mov_r64_to_local(emit->as, REG_ARG_2, i - 1);
+        } else if (i == 2) {
+            asm_x64_mov_r64_to_local(emit->as, REG_ARG_3, i - 1);
+        } else {
+            // TODO not implemented
+            assert(0);
+        }
+    }
+}
+
+void emit_end_pass(emitter_t *emit) {
+    if (!emit->last_emit_was_return_value) {
+        asm_x64_exit(emit->as);
+    }
+    asm_x64_end_pass(emit->as);
+
+    // check stack is back to zero size
+    if (emit->stack_size != 0) {
+        printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size);
+    }
+
+    if (emit->pass == PASS_3) {
+        py_fun_t f = asm_x64_get_code(emit->as);
+        rt_assign_native_code(emit->scope->unique_code_id, f, asm_x64_get_code_size(emit->as), emit->scope->num_params);
+    }
+}
+
+bool emit_last_emit_was_return_value(emitter_t *emit) {
+    return emit->last_emit_was_return_value;
+}
+
+int emit_get_stack_size(emitter_t *emit) {
+    return emit->stack_size;
+}
+
+void emit_set_stack_size(emitter_t *emit, int size) {
+    emit->stack_size = size;
+}
+
+static void adjust_stack(emitter_t *emit, int stack_size_delta) {
+    emit->stack_size += stack_size_delta;
+    assert(emit->stack_size >= 0);
+    if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) {
+        emit->scope->stack_size = emit->stack_size;
+    }
+}
+
+static void stack_settle(emitter_t *emit) {
+    switch (emit->need_to_push) {
+        case NEED_TO_PUSH_NOTHING:
+            break;
+
+        case NEED_TO_PUSH_R64:
+            asm_x64_mov_r64_to_local(emit->as, emit->last_r64, emit->stack_start + emit->stack_size);
+            adjust_stack(emit, 1);
+            break;
+
+        case NEED_TO_PUSH_I64:
+            asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, REG_RAX);
+            asm_x64_mov_r64_to_local(emit->as, REG_RAX, emit->stack_start + emit->stack_size);
+            adjust_stack(emit, 1);
+            break;
+    }
+    emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_raw(emitter_t *emit, int stack_size_delta) {
+    adjust_stack(emit, stack_size_delta);
+    emit->last_emit_was_return_value = false;
+}
+
+static void emit_pre(emitter_t *emit) {
+    stack_settle(emit);
+    emit_pre_raw(emit, 0);
+}
+
+static void emit_pre_pop_r64(emitter_t *emit, int r64) {
+    switch (emit->need_to_push) {
+        case NEED_TO_PUSH_NOTHING:
+            asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64);
+            emit_pre_raw(emit, -1);
+            break;
+
+        case NEED_TO_PUSH_R64:
+            emit_pre_raw(emit, 0);
+            if (emit->last_r64 != r64) {
+                asm_x64_mov_r64_to_r64(emit->as, emit->last_r64, r64);
+            }
+            break;
+
+        case NEED_TO_PUSH_I64:
+            emit_pre_raw(emit, 0);
+            asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, r64);
+            break;
+    }
+    emit->need_to_push = NEED_TO_PUSH_NOTHING;
+}
+
+static void emit_pre_pop_r64_r64(emitter_t *emit, int r64a, int r64b) {
+    emit_pre_pop_r64(emit, r64a);
+    asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b);
+    adjust_stack(emit, -1);
+}
+
+static void emit_pre_pop_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) {
+    emit_pre_pop_r64(emit, r64a);
+    asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b);
+    asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 2, r64c);
+    adjust_stack(emit, -2);
+}
+
+static void emit_post(emitter_t *emit) {
+}
+
+static void emit_post_push_r64(emitter_t *emit, int r64) {
+    emit->need_to_push = NEED_TO_PUSH_R64;
+    emit->last_r64 = r64;
+}
+
+static void emit_post_push_i64(emitter_t *emit, int64_t i64) {
+    emit->need_to_push = NEED_TO_PUSH_I64;
+    emit->last_i64 = i64;
+}
+
+static void emit_post_push_r64_r64(emitter_t *emit, int r64a, int r64b) {
+    asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
+    emit->need_to_push = NEED_TO_PUSH_R64;
+    emit->last_r64 = r64b;
+    adjust_stack(emit, 1);
+}
+
+static void emit_post_push_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) {
+    asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
+    asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1);
+    asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2);
+    adjust_stack(emit, 3);
+}
+
+static void emit_post_push_r64_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c, int r64d) {
+    asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size);
+    asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1);
+    asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2);
+    asm_x64_mov_r64_to_local(emit->as, r64d, emit->stack_start + emit->stack_size + 3);
+    adjust_stack(emit, 4);
+}
+
+static void emit_get_stack_pointer_to_r64_for_pop(emitter_t *emit, int r64, int n_pop) {
+    asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64);
+    adjust_stack(emit, -n_pop);
+}
+
+static void emit_get_stack_pointer_to_r64_for_push(emitter_t *emit, int r64, int n_push) {
+    asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size + n_push - 1, r64);
+    adjust_stack(emit, n_push);
+}
+
+static void emit_call(emitter_t *emit, void *fun) {
+    asm_x64_call_ind(emit->as, fun, REG_RAX);
+}
+
+static void emit_call_with_i64_arg(emitter_t *emit, void *fun, int64_t arg_val, int arg_r64) {
+    asm_x64_mov_i64_to_r64_optimised(emit->as, arg_val, arg_r64);
+    asm_x64_call_ind(emit->as, fun, REG_RAX);
+}
+
+int emit_label_new(emitter_t *emit) {
+    return asm_x64_label_new(emit->as);
+}
+
+void emit_label_assign(emitter_t *emit, int l) {
+    asm_x64_label_assign(emit->as, l);
+}
+
+void emit_import_name(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_import_from(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_import_star(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) {
+    emit_pre(emit);
+    py_obj_t o;
+    switch (tok) {
+        case PY_TOKEN_KW_NONE: o = py_const_none; break;
+        case PY_TOKEN_KW_FALSE: o = py_const_false; break;
+        case PY_TOKEN_KW_TRUE: o = py_const_true; break;
+        default: assert(0); // shouldn't happen
+    }
+    emit_post_push_i64(emit, (uint64_t)o);
+}
+
+void emit_load_const_small_int(emitter_t *emit, int arg) {
+    emit_pre(emit);
+    if (emit->do_native_types) {
+        emit_post_push_i64(emit, arg);
+    } else {
+        emit_post_push_i64(emit, (arg << 1) | 1);
+    }
+}
+
+void emit_load_const_int(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_load_const_dec(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_load_const_id(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+
+void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) {
+    emit_pre(emit);
+    emit_call_with_i64_arg(emit, rt_load_const_str, qstr, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_const_verbatim_start(emitter_t *emit) {
+    assert(0);
+}
+void emit_load_const_verbatim_int(emitter_t *emit, int val) {
+    assert(0);
+}
+void emit_load_const_verbatim_str(emitter_t *emit, const char *str) {
+    assert(0);
+}
+void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) {
+    assert(0);
+}
+void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) {
+    assert(0);
+}
+void emit_load_const_verbatim_end(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) {
+    if (local_num == 0) {
+        emit_pre(emit);
+        emit_post_push_r64(emit, REG_LOCAL_1);
+    } else {
+        emit_pre(emit);
+        asm_x64_mov_local_to_r64(emit->as, local_num - 1, REG_RAX);
+        emit_post_push_r64(emit, REG_RAX);
+    }
+}
+
+void emit_load_name(emitter_t *emit, qstr qstr) {
+    emit_pre(emit);
+    emit_call_with_i64_arg(emit, rt_load_name, qstr, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_global(emitter_t *emit, qstr qstr) {
+    emit_pre(emit);
+    emit_call_with_i64_arg(emit, rt_load_global, qstr, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_deref(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_load_closure(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+
+void emit_load_attr(emitter_t *emit, qstr qstr) {
+    emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base
+    emit_call_with_i64_arg(emit, rt_load_attr, qstr, REG_ARG_2); // arg2 = attribute name
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_load_method(emitter_t *emit, qstr qstr) {
+    emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base
+    emit_get_stack_pointer_to_r64_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
+    emit_call_with_i64_arg(emit, rt_load_method, qstr, REG_ARG_2); // arg2 = method name
+}
+
+void emit_load_build_class(emitter_t *emit) {
+   assert(0);
+} // basically load __build_class__ from builtins
+
+void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) {
+    if (local_num == 0) {
+        emit_pre_pop_r64(emit, REG_LOCAL_1);
+        emit_post(emit);
+    } else {
+        emit_pre_pop_r64(emit, REG_RAX);
+        asm_x64_mov_r64_to_local(emit->as, REG_RAX, local_num - 1);
+        emit_post(emit);
+    }
+}
+
+void emit_store_name(emitter_t *emit, qstr qstr) {
+    emit_pre_pop_r64(emit, REG_ARG_2);
+    emit_call_with_i64_arg(emit, rt_store_name, qstr, REG_ARG_1); // arg1 = name
+    emit_post(emit);
+}
+
+void emit_store_global(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+
+void emit_store_deref(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_store_attr(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_store_locals(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_store_subscr(emitter_t *emit) {
+    emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store
+    emit_call(emit, rt_store_subscr);
+}
+
+void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) {
+    assert(0);
+}
+void emit_delete_name(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_global(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_deref(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_attr(emitter_t *emit, qstr qstr) {
+    assert(0);
+}
+void emit_delete_subscr(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_dup_top(emitter_t *emit) {
+    emit_pre_pop_r64(emit, REG_RAX);
+    emit_post_push_r64_r64(emit, REG_RAX, REG_RAX);
+}
+
+void emit_dup_top_two(emitter_t *emit) {
+    emit_pre_pop_r64_r64(emit, REG_RAX, REG_RDI);
+    emit_post_push_r64_r64_r64_r64(emit, REG_RDI, REG_RAX, REG_RDI, REG_RAX);
+}
+
+void emit_pop_top(emitter_t *emit) {
+    emit_pre_pop_r64(emit, REG_RAX);
+    emit_post(emit);
+}
+
+void emit_rot_two(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_rot_three(emitter_t *emit) {
+    emit_pre_pop_r64_r64_r64(emit, REG_RAX, REG_RDI, REG_RSI);
+    emit_post_push_r64_r64_r64(emit, REG_RAX, REG_RSI, REG_RDI);
+}
+
+void emit_jump(emitter_t *emit, int label) {
+    emit_pre(emit);
+    asm_x64_jmp_label(emit->as, label);
+    emit_post(emit);
+}
+
+void emit_pop_jump_if_false(emitter_t *emit, int label) {
+    if (emit->do_native_types) {
+        emit_pre_pop_r64(emit, REG_RET);
+        asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET);
+        asm_x64_jcc_label(emit->as, JCC_JZ, label);
+        emit_post(emit);
+    } else {
+        emit_pre_pop_r64(emit, REG_ARG_1);
+        emit_call(emit, rt_is_true);
+        asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET);
+        asm_x64_jcc_label(emit->as, JCC_JZ, label);
+        emit_post(emit);
+    }
+}
+
+void emit_pop_jump_if_true(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_jump_if_true_or_pop(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_jump_if_false_or_pop(emitter_t *emit, int label) {
+    assert(0);
+}
+
+void emit_setup_loop(emitter_t *emit, int label) {
+    emit_pre(emit);
+    emit_post(emit);
+}
+
+void emit_break_loop(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_continue_loop(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_setup_with(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_with_cleanup(emitter_t *emit) {
+    assert(0);
+}
+void emit_setup_except(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_setup_finally(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_end_finally(emitter_t *emit) {
+    assert(0);
+}
+void emit_get_iter(emitter_t *emit) {
+    assert(0);
+} // tos = getiter(tos)
+void emit_for_iter(emitter_t *emit, int label) {
+    assert(0);
+}
+void emit_for_iter_end(emitter_t *emit) {
+    assert(0);
+}
+void emit_pop_except(emitter_t *emit) {
+    assert(0);
+}
+
+void emit_unary_op(emitter_t *emit, rt_unary_op_t op) {
+    emit_pre_pop_r64(emit, REG_ARG_2);
+    emit_call_with_i64_arg(emit, rt_unary_op, op, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_build_tuple(emitter_t *emit, int n_args) {
+    assert(0);
+}
+
+void emit_build_list(emitter_t *emit, int n_args) {
+    emit_pre(emit);
+    emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+    emit_call_with_i64_arg(emit, rt_build_list, n_args, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET); // new list
+}
+
+void emit_list_append(emitter_t *emit, int list_index) {
+    assert(0);
+}
+
+void emit_build_map(emitter_t *emit, int n_args) {
+    emit_pre(emit);
+    emit_call_with_i64_arg(emit, rt_build_map, n_args, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET); // new map
+}
+
+void emit_store_map(emitter_t *emit) {
+    emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map
+    emit_call(emit, rt_store_map);
+    emit_post_push_r64(emit, REG_RET); // map
+}
+
+void emit_map_add(emitter_t *emit, int map_index) {
+    assert(0);
+}
+
+void emit_build_set(emitter_t *emit, int n_args) {
+    emit_pre(emit);
+    emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order
+    emit_call_with_i64_arg(emit, rt_build_set, n_args, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET); // new set
+}
+
+void emit_set_add(emitter_t *emit, int set_index) {
+    assert(0);
+}
+void emit_build_slice(emitter_t *emit, int n_args) {
+    assert(0);
+}
+void emit_unpack_sequence(emitter_t *emit, int n_args) {
+    assert(0);
+}
+void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) {
+    assert(0);
+}
+
+void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    assert(n_default_params == 0 && n_dict_params == 0);
+    emit_pre(emit);
+    emit_call_with_i64_arg(emit, rt_make_function_from_id, scope->unique_code_id, REG_ARG_1);
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) {
+    assert(0);
+}
+
+void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+    if (n_positional == 0) {
+        emit_pre_pop_r64(emit, REG_ARG_1); // the function
+        emit_call(emit, rt_call_function_0);
+    } else if (n_positional == 1) {
+        emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function
+        emit_call(emit, rt_call_function_1);
+    } else if (n_positional == 2) {
+        emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function
+        emit_call(emit, rt_call_function_2);
+    } else {
+        assert(0);
+    }
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
+    assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+    if (n_positional == 0) {
+        emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method
+        emit_call(emit, rt_call_method_1);
+    } else if (n_positional == 1) {
+        emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method
+        emit_call(emit, rt_call_method_2);
+    } else {
+        assert(0);
+    }
+    emit_post_push_r64(emit, REG_RET);
+}
+
+void emit_pop_block(emitter_t *emit) {
+    emit_pre(emit);
+    emit_post(emit);
+}
+
+void emit_binary_op(emitter_t *emit, rt_binary_op_t op) {
+    if (emit->do_native_types) {
+        assert(op == RT_BINARY_OP_ADD);
+        emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_RET);
+        asm_x64_add_r64_to_r64(emit->as, REG_ARG_2, REG_RET);
+        emit_post_push_r64(emit, REG_RET);
+    } else {
+        emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
+        emit_call_with_i64_arg(emit, rt_binary_op, op, REG_ARG_1);
+        emit_post_push_r64(emit, REG_RET);
+    }
+}
+
+void emit_compare_op(emitter_t *emit, rt_compare_op_t op) {
+    if (emit->do_native_types) {
+        assert(op == RT_COMPARE_OP_LESS);
+        emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
+        asm_x64_xor_r64_to_r64(emit->as, REG_RET, REG_RET);
+        asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2);
+        asm_x64_setcc_r8(emit->as, JCC_JL, REG_RET);
+        emit_post_push_r64(emit, REG_RET);
+    } else {
+        emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2);
+        emit_call_with_i64_arg(emit, rt_compare_op, op, REG_ARG_1);
+        emit_post_push_r64(emit, REG_RET);
+    }
+}
+
+void emit_return_value(emitter_t *emit) {
+    emit_pre_pop_r64(emit, REG_RAX);
+    emit->last_emit_was_return_value = true;
+    //asm_x64_call_ind(emit->as, 0, REG_RAX); to seg fault for debugging with gdb
+    asm_x64_exit(emit->as);
+}
+
+void emit_raise_varargs(emitter_t *emit, int n_args) {
+    assert(0);
+}
+void emit_yield_value(emitter_t *emit) {
+    assert(0);
+}
+void emit_yield_from(emitter_t *emit) {
+    assert(0);
+}
+
+#endif // EMIT_DO_X64
diff --git a/py/grammar.h b/py/grammar.h
new file mode 100644
index 0000000..05bb237
--- /dev/null
+++ b/py/grammar.h
@@ -0,0 +1,300 @@
+// rules for writing rules:
+// - zero_or_more is implemented using opt_rule around a one_or_more rule
+// - don't put opt_rule in arguments of or rule; instead, wrap the call to this or rule in opt_rule
+
+// # Start symbols for the grammar:
+// #       single_input is a single interactive statement;
+// #       file_input is a module or sequence of commands read from an input file;
+// #       eval_input is the input for the eval() functions.
+// # NB: compound_stmt in single_input is followed by extra NEWLINE!
+// single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+// file_input: (NEWLINE | stmt)* ENDMARKER
+// eval_input: testlist NEWLINE* ENDMARKER
+
+DEF_RULE(file_input, nc, and(1), opt_rule(file_input_2))
+DEF_RULE(file_input_2, c(generic_all_nodes), one_or_more, rule(file_input_3))
+DEF_RULE(file_input_3, nc, or(2), tok(NEWLINE), rule(stmt))
+
+// decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+// decorators: decorator+
+// decorated: decorators (classdef | funcdef)
+// funcdef: 'def' NAME parameters ['->' test] ':' suite
+// parameters: '(' [typedargslist] ')'
+// typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* [',' ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef
+// tfpdef: NAME [':' test]
+// varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef
+// vfpdef: NAME
+
+DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE))
+//DEF_RULE(decorator_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+DEF_RULE(decorators, nc, one_or_more, rule(decorator))
+DEF_RULE(decorated, c(decorated), and(2), rule(decorators), rule(decorated_body))
+DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef))
+DEF_RULE(funcdef, c(funcdef), and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdef_2), tok(DEL_COLON), rule(suite))
+DEF_RULE(funcdef_2, nc, and(2), tok(DEL_MINUS_MORE), rule(test))
+// TODO typedargslist lets through more than is allowed
+DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA))
+DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star))
+DEF_RULE(typedargslist_name, nc, and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal))
+DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef))
+DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon))
+DEF_RULE(typedargslist_colon, nc, and(2), tok(DEL_COLON), rule(test))
+DEF_RULE(typedargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon))
+// TODO varargslist lets through more than is allowed
+DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA))
+DEF_RULE(varargslist_item, nc, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star))
+DEF_RULE(varargslist_name, nc, and(2), tok(NAME), opt_rule(varargslist_equal))
+DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef))
+DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME))
+DEF_RULE(varargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE(vfpdef, nc, and(1), tok(NAME))
+
+// stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | simple_stmt
+
+DEF_RULE(stmt, nc, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(simple_stmt))
+
+// simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+
+DEF_RULE(simple_stmt, nc, and(2), rule(simple_stmt_2), tok(NEWLINE))
+DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), tok(DEL_SEMICOLON))
+
+// small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
+// expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist_star_expr))*)
+// testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+// augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//='
+// # For normal assignments, additional restrictions enforced by the interpreter
+
+DEF_RULE(small_stmt, nc, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt))
+DEF_RULE(expr_stmt, c(expr_stmt), and(2), rule(testlist_star_expr), opt_rule(expr_stmt_2))
+DEF_RULE(expr_stmt_2, nc, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list))
+DEF_RULE(expr_stmt_augassign, nc, and(2), rule(augassign), rule(expr_stmt_6))
+DEF_RULE(expr_stmt_assign_list, nc, one_or_more, rule(expr_stmt_assign))
+DEF_RULE(expr_stmt_assign, nc, and(2), tok(DEL_EQUAL), rule(expr_stmt_6))
+DEF_RULE(expr_stmt_6, nc, or(2), rule(yield_expr), rule(testlist_star_expr))
+DEF_RULE(testlist_star_expr, c(generic_tuple), list_with_end, rule(testlist_star_expr_2), tok(DEL_COMMA))
+DEF_RULE(testlist_star_expr_2, nc, or(2), rule(star_expr), rule(test))
+DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL))
+
+// del_stmt: 'del' exprlist
+// pass_stmt: 'pass'
+// flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+// break_stmt: 'break'
+// continue_stmt: 'continue'
+// return_stmt: 'return' [testlist]
+// yield_stmt: yield_expr
+// raise_stmt: 'raise' [test ['from' test]]
+
+DEF_RULE(del_stmt, c(del_stmt), and(2), tok(KW_DEL), rule(exprlist))
+DEF_RULE(pass_stmt, c(generic_all_nodes), and(1), tok(KW_PASS))
+DEF_RULE(flow_stmt, nc, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt))
+DEF_RULE(break_stmt, c(break_stmt), and(1), tok(KW_BREAK))
+DEF_RULE(continue_stmt, c(continue_stmt), and(1), tok(KW_CONTINUE))
+DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist))
+DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr))
+DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg))
+DEF_RULE(raise_stmt_arg, nc, and(2), rule(test), opt_rule(raise_stmt_from))
+DEF_RULE(raise_stmt_from, nc, and(2), tok(KW_FROM), rule(test))
+
+// import_stmt: import_name | import_from
+// import_name: 'import' dotted_as_names
+// import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ('*' | '(' import_as_names ')' | import_as_names)
+// import_as_name: NAME ['as' NAME]
+// dotted_as_name: dotted_name ['as' NAME]
+// import_as_names: import_as_name (',' import_as_name)* [',']
+// dotted_as_names: dotted_as_name (',' dotted_as_name)*
+// dotted_name: NAME ('.' NAME)*
+// global_stmt: 'global' NAME (',' NAME)*
+// nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+// assert_stmt: 'assert' test [',' test]
+
+DEF_RULE(import_stmt, nc, or(2), rule(import_name), rule(import_from))
+DEF_RULE(import_name, c(import_name), and(2), tok(KW_IMPORT), rule(dotted_as_names))
+DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), tok(KW_IMPORT), rule(import_from_3))
+DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b))
+DEF_RULE(import_from_2b, nc, and(2), rule(one_or_more_period_or_ellipses), opt_rule(dotted_name))
+DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names))
+DEF_RULE(import_as_names_paren, nc, and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE))
+DEF_RULE(one_or_more_period_or_ellipses, nc, one_or_more, rule(period_or_ellipses))
+DEF_RULE(period_or_ellipses, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSES))
+DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name))
+DEF_RULE(dotted_as_name, nc, and(2), rule(dotted_name), opt_rule(as_name))
+DEF_RULE(as_name, nc, and(2), tok(KW_AS), tok(NAME))
+DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA))
+DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA))
+DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD))
+DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list))
+DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list))
+DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA))
+DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra))
+DEF_RULE(assert_stmt_extra, nc, and(2), tok(DEL_COMMA), rule(test))
+
+// if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+// while_stmt: 'while' test ':' suite ['else' ':' suite]
+// for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+// try_stmt: 'try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite)
+// # NB compile.c makes sure that the default except clause is last
+// except_clause: 'except' [test ['as' NAME]]
+// with_stmt: 'with' with_item (',' with_item)* ':' suite
+// with_item: test ['as' expr]
+// suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+DEF_RULE(if_stmt, c(if_stmt), and(6), tok(KW_IF), rule(test), tok(DEL_COLON), rule(suite), opt_rule(if_stmt_elif_list), opt_rule(else_stmt))
+DEF_RULE(if_stmt_elif_list, nc, one_or_more, rule(if_stmt_elif))
+DEF_RULE(if_stmt_elif, nc, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite))
+DEF_RULE(while_stmt, c(while_stmt), and(5), tok(KW_WHILE), rule(test), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
+DEF_RULE(for_stmt, c(for_stmt), and(7), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(testlist), tok(DEL_COLON), rule(suite), opt_rule(else_stmt))
+DEF_RULE(try_stmt, c(try_stmt), and(4), tok(KW_TRY), tok(DEL_COLON), rule(suite), rule(try_stmt_2))
+DEF_RULE(try_stmt_2, nc, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally))
+DEF_RULE(try_stmt_except_and_more, nc, and(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally))
+DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite))
+DEF_RULE(try_stmt_as_name, nc, and(2), rule(test), opt_rule(as_name))
+DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except))
+DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite))
+DEF_RULE(else_stmt, nc, and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite))
+DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite))
+DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA))
+DEF_RULE(with_item, nc, and(2), rule(test), opt_rule(with_item_as))
+DEF_RULE(with_item_as, nc, and(2), tok(KW_AS), rule(expr))
+DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt))
+DEF_RULE(suite_block, nc, and(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT))
+DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt))
+
+// test: or_test ['if' or_test 'else' test] | lambdef
+// test_nocond: or_test | lambdef_nocond
+// lambdef: 'lambda' [varargslist] ':' test
+// lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+
+DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr))
+DEF_RULE(test_if_expr, c(test_if_expr), and(2), rule(or_test), opt_rule(test_if_else))
+DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test))
+DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test))
+DEF_RULE(lambdef, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test))
+DEF_RULE(lambdef_nocond, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond))
+
+// or_test: and_test ('or' and_test)*
+// and_test: not_test ('and' not_test)*
+// not_test: 'not' not_test | comparison
+// comparison: expr (comp_op expr)*
+// comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+// star_expr: '*' expr
+// expr: xor_expr ('|' xor_expr)*
+// xor_expr: and_expr ('^' and_expr)*
+// and_expr: shift_expr ('&' shift_expr)*
+// shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+// arith_expr: term (('+'|'-') term)*
+// term: factor (('*'|'/'|'%'|'//') factor)*
+// factor: ('+'|'-'|'~') factor | power
+// power: atom trailer* ['**' factor]
+
+DEF_RULE(or_test, c(or_test), list, rule(and_test), tok(KW_OR))
+DEF_RULE(and_test, c(and_test), list, rule(not_test), tok(KW_AND))
+DEF_RULE(not_test, nc, or(2), rule(not_test_2), rule(comparison))
+DEF_RULE(not_test_2, c(not_test_2), and(2), tok(KW_NOT), rule(not_test))
+DEF_RULE(comparison, c(comparison), list, rule(expr), rule(comp_op))
+DEF_RULE(comp_op, nc, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is))
+DEF_RULE(comp_op_not_in, nc, and(2), tok(KW_NOT), tok(KW_IN))
+DEF_RULE(comp_op_is, nc, and(2), tok(KW_IS), opt_rule(comp_op_is_not))
+DEF_RULE(comp_op_is_not, nc, and(1), tok(KW_NOT))
+DEF_RULE(star_expr, c(star_expr), and(2), tok(OP_STAR), rule(expr))
+DEF_RULE(expr, c(expr), list, rule(xor_expr), tok(OP_PIPE))
+DEF_RULE(xor_expr, c(xor_expr), list, rule(and_expr), tok(OP_CARET))
+DEF_RULE(and_expr, c(and_expr), list, rule(shift_expr), tok(OP_AMPERSAND))
+DEF_RULE(shift_expr, c(shift_expr), list, rule(arith_expr), rule(shift_op))
+DEF_RULE(shift_op, nc, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE))
+DEF_RULE(arith_expr, c(arith_expr), list, rule(term), rule(arith_op))
+DEF_RULE(arith_op, nc, or(2), tok(OP_PLUS), tok(OP_MINUS))
+DEF_RULE(term, c(term), list, rule(factor), rule(term_op))
+DEF_RULE(term_op, nc, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH))
+DEF_RULE(factor, nc, or(2), rule(factor_2), rule(power))
+DEF_RULE(factor_2, c(factor_2), and(2), rule(factor_op), rule(factor))
+DEF_RULE(factor_op, nc, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE))
+DEF_RULE(power, c(generic_all_nodes), and(3), rule(atom), opt_rule(power_trailers), opt_rule(power_dbl_star))
+DEF_RULE(power_trailers, c(power_trailers), one_or_more, rule(trailer))
+DEF_RULE(power_dbl_star, c(power_dbl_star), and(2), tok(OP_DBL_STAR), rule(factor))
+
+// atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False'
+// testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+// trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+
+DEF_RULE(atom, nc, or(10), tok(NAME), tok(NUMBER), rule(atom_string), tok(ELLIPSES), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
+DEF_RULE(atom_string, c(atom_string), one_or_more, rule(string_or_bytes))
+DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES))
+DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE))
+DEF_RULE(atom_2b, nc, or(2), rule(yield_expr), rule(testlist_comp))
+DEF_RULE(atom_bracket, c(atom_bracket), and(3), tok(DEL_BRACKET_OPEN), opt_rule(testlist_comp), tok(DEL_BRACKET_CLOSE))
+DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictorsetmaker), tok(DEL_BRACE_CLOSE))
+DEF_RULE(testlist_comp, nc, and(2), rule(testlist_comp_2), opt_rule(testlist_comp_3))
+DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test))
+DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b))
+DEF_RULE(testlist_comp_3b, nc, and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c))
+DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA))
+DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period))
+DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+DEF_RULE(trailer_bracket, c(trailer_bracket), and(3), tok(DEL_BRACKET_OPEN), rule(subscriptlist), tok(DEL_BRACKET_CLOSE))
+DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME))
+
+// subscriptlist: subscript (',' subscript)* [',']
+// subscript: test | [test] ':' [test] [sliceop]
+// sliceop: ':' [test]
+
+DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(subscript), tok(DEL_COMMA))
+DEF_RULE(subscript, nc, or(2), rule(subscript_3), rule(subscript_2))
+DEF_RULE(subscript_2, c(subscript_2), and(2), rule(test), opt_rule(subscript_3))
+DEF_RULE(subscript_3, c(subscript_3), and(2), tok(DEL_COLON), opt_rule(subscript_3b))
+DEF_RULE(subscript_3b, nc, or(2), rule(subscript_3c), rule(subscript_3d))
+DEF_RULE(subscript_3c, nc, and(2), tok(DEL_COLON), opt_rule(test))
+DEF_RULE(subscript_3d, nc, and(2), rule(test), opt_rule(sliceop))
+DEF_RULE(sliceop, nc, and(2), tok(DEL_COLON), opt_rule(test))
+
+// exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+// testlist: test (',' test)* [',']
+// dictorsetmaker: (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [',']))
+
+DEF_RULE(exprlist, nc, list_with_end, rule(exprlist_2), tok(DEL_COMMA))
+DEF_RULE(exprlist_2, nc, or(2), rule(star_expr), rule(expr))
+DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA))
+// TODO dictorsetmaker lets through more than is allowed
+DEF_RULE(dictorsetmaker, nc, and(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail))
+DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(2), rule(test), opt_rule(dictorsetmaker_colon))
+DEF_RULE(dictorsetmaker_colon, nc, and(2), tok(DEL_COLON), rule(test))
+DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list))
+DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2))
+DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA))
+
+// classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+DEF_RULE(classdef, c(classdef), and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite))
+DEF_RULE(classdef_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE))
+
+// arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test)
+
+// TODO arglist lets through more than is allowed, compiler needs to do further verification
+DEF_RULE(arglist, c(generic_all_nodes), list_with_end, rule(arglist_2), tok(DEL_COMMA))
+DEF_RULE(arglist_2, nc, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument))
+DEF_RULE(arglist_star, c(arglist_star), and(2), tok(OP_STAR), rule(test))
+DEF_RULE(arglist_dbl_star, c(arglist_dbl_star), and(2), tok(OP_DBL_STAR), rule(test))
+
+// # The reason that keywords are test nodes instead of NAME is that using NAME
+// # results in an ambiguity. ast.c makes sure it's a NAME.
+// argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+// comp_iter: comp_for | comp_if
+// comp_for: 'for' exprlist 'in' or_test [comp_iter]
+// comp_if: 'if' test_nocond [comp_iter]
+
+DEF_RULE(argument, c(argument), and(2), rule(test), opt_rule(argument_2))
+DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3))
+DEF_RULE(argument_3, nc, and(2), tok(DEL_EQUAL), rule(test))
+DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if))
+DEF_RULE(comp_for, nc, and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter))
+DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter))
+
+// # not used in grammar, but may appear in "node" passed from Parser to Compiler
+// encoding_decl: NAME
+
+// yield_expr: 'yield' [yield_arg]
+// yield_arg: 'from' test | testlist
+
+DEF_RULE(yield_expr, c(yield_expr), and(2), tok(KW_YIELD), opt_rule(yield_arg))
+DEF_RULE(yield_arg, nc, or(2), rule(yield_arg_from), rule(testlist))
+DEF_RULE(yield_arg_from, nc, and(2), tok(KW_FROM), rule(test))
diff --git a/py/lexer.c b/py/lexer.c
new file mode 100644
index 0000000..9c2195e
--- /dev/null
+++ b/py/lexer.c
@@ -0,0 +1,677 @@
+/* lexer.c -- simple tokeniser for Python implementation
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+
+#define TAB_SIZE (8)
+#define CHR_EOF (-1)
+
+struct _py_lexer_t {
+    const char *name;           // (file) name of source
+    bool free;                  // free source when done with it
+
+    const char *src_beg;        // beginning of source
+    const char *src_cur;        // current location in source; points to chr0
+    const char *src_end;        // end (exclusive) of source
+    unichar chr0, chr1, chr2;   // current characters from source
+
+    uint line;                  // source line
+    uint column;                // source column
+
+    uint cont_line;             // continued line
+
+    int emit_dent;
+    int nested_bracket_level;
+
+    uint alloc_indent_level;
+    uint num_indent_level;
+    uint16_t *indent_level;
+
+    py_token_t tok_cur;
+    py_token_t tok_next;
+};
+
+static bool py_token_is_str(const py_token_t *tok, const char *str) {
+    uint i = 0;
+    const char *tstr = tok->str;
+
+    while (i < tok->len && *tstr == *str) {
+        ++i;
+        ++tstr;
+        ++str;
+    }
+
+    return i == tok->len && *str == 0;
+}
+
+void py_token_show(const py_token_t *tok) {
+    printf("(%s:%d:%d) kind:%d cont_line:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->cont_line, tok->str, tok->len);
+    if (tok->str != NULL && tok->len > 0) {
+        const char *i = tok->str;
+        const char *j = i + tok->len;
+        printf(" ");
+        while (i < j) {
+            unichar c = g_utf8_get_char(i);
+            i = g_utf8_next_char(i);
+            if (g_unichar_isprint(c)) {
+                printf("%c", c);
+            } else {
+                printf("?");
+            }
+        }
+    }
+    printf("\n");
+}
+
+void py_token_show_error_prefix(const py_token_t *tok) {
+    printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column);
+}
+
+bool py_token_show_error(const py_token_t *tok, const char *msg) {
+    printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg);
+    return false;
+}
+
+static bool is_end(py_lexer_t *lex) {
+    return lex->chr0 == CHR_EOF;
+}
+
+static bool is_physical_newline(py_lexer_t *lex) {
+    return lex->chr0 == '\n' || lex->chr0 == '\r';
+}
+
+static bool is_char(py_lexer_t *lex, char c) {
+    return lex->chr0 == c;
+}
+
+static bool is_char_or(py_lexer_t *lex, char c1, char c2) {
+    return lex->chr0 == c1 || lex->chr0 == c2;
+}
+
+static bool is_char_or3(py_lexer_t *lex, char c1, char c2, char c3) {
+    return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
+}
+
+/*
+static bool is_char_following(py_lexer_t *lex, char c) {
+    return lex->chr1 == c;
+}
+*/
+
+static bool is_char_following_or(py_lexer_t *lex, char c1, char c2) {
+    return lex->chr1 == c1 || lex->chr1 == c2;
+}
+
+static bool is_char_following_following_or(py_lexer_t *lex, char c1, char c2) {
+    return lex->chr2 == c1 || lex->chr2 == c2;
+}
+
+static bool is_char_and(py_lexer_t *lex, char c1, char c2) {
+    return lex->chr0 == c1 && lex->chr1 == c2;
+}
+
+static bool is_whitespace(py_lexer_t *lex) {
+    return g_unichar_isspace(lex->chr0);
+}
+
+static bool is_letter(py_lexer_t *lex) {
+    return g_unichar_isalpha(lex->chr0);
+}
+
+static bool is_digit(py_lexer_t *lex) {
+    return g_unichar_isdigit(lex->chr0);
+}
+
+static bool is_following_digit(py_lexer_t *lex) {
+    return g_unichar_isdigit(lex->chr1);
+}
+
+// TODO UNICODE include unicode characters in definition of identifiers
+static bool is_head_of_identifier(py_lexer_t *lex) {
+    return is_letter(lex) || lex->chr0 == '_';
+}
+
+// TODO UNICODE include unicode characters in definition of identifiers
+static bool is_tail_of_identifier(py_lexer_t *lex) {
+    return is_head_of_identifier(lex) || is_digit(lex);
+}
+
+static void next_char(py_lexer_t *lex) {
+    if (lex->chr0 == CHR_EOF) {
+        return;
+    }
+
+    int advance = 1;
+
+    if (lex->chr0 == '\n') {
+        // LF is a new line
+        ++lex->line;
+        lex->column = 1;
+        lex->cont_line = lex->line;
+    } else if (lex->chr0 == '\r') {
+        // CR is a new line
+        ++lex->line;
+        lex->column = 1;
+        lex->cont_line = lex->line;
+        if (lex->chr1 == '\n') {
+            // CR LF is a single new line
+            advance = 2;
+        }
+    } else if (lex->chr0 == '\t') {
+        // a tab
+        lex->column = (((lex->column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1;
+    } else {
+        // a character worth one column
+        ++lex->column;
+    }
+
+    for (; advance > 0; advance--) {
+        lex->chr0 = lex->chr1;
+        lex->chr1 = lex->chr2;
+        lex->src_cur++;
+        if (lex->src_cur + 2 < lex->src_end) {
+            lex->chr2 = lex->src_cur[2];
+        } else {
+            // EOF
+            if (lex->chr1 != '\n' && lex->chr1 != '\r') {
+                lex->chr2 = '\n'; // insert newline at end of file
+            } else {
+                lex->chr2 = CHR_EOF;
+            }
+        }
+    }
+}
+
+void indent_push(py_lexer_t *lex, uint indent) {
+    if (lex->num_indent_level >= lex->alloc_indent_level) {
+        lex->alloc_indent_level *= 2;
+        lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level);
+    }
+    lex->indent_level[lex->num_indent_level++] = indent;
+}
+
+uint indent_top(py_lexer_t *lex) {
+    return lex->indent_level[lex->num_indent_level - 1];
+}
+
+void indent_pop(py_lexer_t *lex) {
+    lex->num_indent_level -= 1;
+}
+
+// some tricky operator encoding:
+//     <op>  = begin with <op>, if this opchar matches then begin here
+//     e<op> = end with <op>, if this opchar matches then end
+//     E<op> = mandatory end with <op>, this opchar must match, then end
+//     c<op> = continue with <op>, if this opchar matches then continue matching
+// this means if the start of two ops are the same then they are equal til the last char
+
+static const char *tok_enc =
+    "()[]{},:;@~" // singles
+    "<e=c<e="     // < <= << <<=
+    ">e=c>e="     // > >= >> >>=
+    "*e=c*e="     // * *= ** **=
+    "+e="         // + +=
+    "-e=e>"       // - -= ->
+    "&e="         // & &=
+    "|e="         // | |=
+    "/e=c/e="     // / /= // //=
+    "%e="         // % %=
+    "^e="         // ^ ^=
+    "=e="         // = ==
+    "!E="         // !=
+    ".c.E.";      // . ...
+
+// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries
+static const uint8_t tok_enc_kind[] = {
+    PY_TOKEN_DEL_PAREN_OPEN, PY_TOKEN_DEL_PAREN_CLOSE,
+    PY_TOKEN_DEL_BRACKET_OPEN, PY_TOKEN_DEL_BRACKET_CLOSE,
+    PY_TOKEN_DEL_BRACE_OPEN, PY_TOKEN_DEL_BRACE_CLOSE,
+    PY_TOKEN_DEL_COMMA, PY_TOKEN_DEL_COLON, PY_TOKEN_DEL_SEMICOLON, PY_TOKEN_DEL_AT, PY_TOKEN_OP_TILDE,
+
+    PY_TOKEN_OP_LESS, PY_TOKEN_OP_LESS_EQUAL, PY_TOKEN_OP_DBL_LESS, PY_TOKEN_DEL_DBL_LESS_EQUAL,
+    PY_TOKEN_OP_MORE, PY_TOKEN_OP_MORE_EQUAL, PY_TOKEN_OP_DBL_MORE, PY_TOKEN_DEL_DBL_MORE_EQUAL,
+    PY_TOKEN_OP_STAR, PY_TOKEN_DEL_STAR_EQUAL, PY_TOKEN_OP_DBL_STAR, PY_TOKEN_DEL_DBL_STAR_EQUAL,
+    PY_TOKEN_OP_PLUS, PY_TOKEN_DEL_PLUS_EQUAL,
+    PY_TOKEN_OP_MINUS, PY_TOKEN_DEL_MINUS_EQUAL, PY_TOKEN_DEL_MINUS_MORE,
+    PY_TOKEN_OP_AMPERSAND, PY_TOKEN_DEL_AMPERSAND_EQUAL,
+    PY_TOKEN_OP_PIPE, PY_TOKEN_DEL_PIPE_EQUAL,
+    PY_TOKEN_OP_SLASH, PY_TOKEN_DEL_SLASH_EQUAL, PY_TOKEN_OP_DBL_SLASH, PY_TOKEN_DEL_DBL_SLASH_EQUAL,
+    PY_TOKEN_OP_PERCENT, PY_TOKEN_DEL_PERCENT_EQUAL,
+    PY_TOKEN_OP_CARET, PY_TOKEN_DEL_CARET_EQUAL,
+    PY_TOKEN_DEL_EQUAL, PY_TOKEN_OP_DBL_EQUAL,
+    PY_TOKEN_OP_NOT_EQUAL,
+    PY_TOKEN_DEL_PERIOD, PY_TOKEN_ELLIPSES,
+};
+
+// must have the same order as enum in lexer.h
+static const char *tok_kw[] = {
+    "False",
+    "None",
+    "True",
+    "and",
+    "as",
+    "assert",
+    "break",
+    "class",
+    "continue",
+    "def",
+    "del",
+    "elif",
+    "else",
+    "except",
+    "finally",
+    "for",
+    "from",
+    "global",
+    "if",
+    "import",
+    "in",
+    "is",
+    "lambda",
+    "nonlocal",
+    "not",
+    "or",
+    "pass",
+    "raise",
+    "return",
+    "try",
+    "while",
+    "with",
+    "yield",
+    NULL,
+};
+
+static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
+    bool had_physical_newline = false;
+
+    while (!is_end(lex)) {
+        if (is_physical_newline(lex)) {
+            had_physical_newline = true;
+            next_char(lex);
+        } else if (is_whitespace(lex)) {
+            next_char(lex);
+        } else if (is_char(lex, '#')) {
+            next_char(lex);
+            while (!is_end(lex) && !is_physical_newline(lex)) {
+                next_char(lex);
+            }
+            // had_physical_newline will be set on next loop
+        } else if (is_char(lex, '\\')) {
+            // backslash (outside string literals) must appear just before a physical newline
+            next_char(lex);
+            if (!is_physical_newline(lex)) {
+                // TODO SyntaxError
+                assert(0);
+            } else {
+                next_char(lex);
+            }
+        } else {
+            break;
+        }
+    }
+
+    tok->src_name = lex->name;
+    tok->src_line = lex->line;
+    tok->src_column = lex->column;
+    tok->kind = PY_TOKEN_INVALID;
+    tok->cont_line = lex->cont_line;
+    tok->str = lex->src_cur;
+    tok->len = 0;
+
+    if (lex->emit_dent < 0) {
+        tok->kind = PY_TOKEN_DEDENT;
+        lex->emit_dent += 1;
+
+    } else if (lex->emit_dent > 0) {
+        tok->kind = PY_TOKEN_INDENT;
+        lex->emit_dent -= 1;
+
+    } else if (had_physical_newline && lex->nested_bracket_level == 0
+                   && tok != &lex->tok_cur // so that we don't emit a newline if file starts with a comment
+               ) {
+        tok->kind = PY_TOKEN_NEWLINE;
+
+        uint num_spaces = lex->column - 1;
+        lex->emit_dent = 0;
+        if (num_spaces == indent_top(lex)) {
+        } else if (num_spaces > indent_top(lex)) {
+            indent_push(lex, num_spaces);
+            lex->emit_dent += 1;
+        } else {
+            while (num_spaces < indent_top(lex)) {
+                indent_pop(lex);
+                lex->emit_dent -= 1;
+            }
+            if (num_spaces != indent_top(lex)) {
+                //SyntaxError
+            }
+        }
+
+    } else if (is_end(lex)) {
+        // TODO emit a newline if file does not end in one
+        if (indent_top(lex) > 0) {
+            tok->kind = PY_TOKEN_NEWLINE;
+            lex->emit_dent = 0;
+            while (indent_top(lex) > 0) {
+                indent_pop(lex);
+                lex->emit_dent -= 1;
+            }
+        } else {
+            tok->kind = PY_TOKEN_END;
+        }
+
+    } else if (is_char_or(lex, '\'', '\"')
+               || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
+               || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) {
+        // a string or bytes literal
+
+        // parse type codes
+        bool is_raw = false;
+        bool is_bytes = false;
+        if (is_char(lex, 'u')) {
+            next_char(lex);
+        } else if (is_char(lex, 'b')) {
+            is_bytes = true;
+            next_char(lex);
+            if (is_char(lex, 'r')) {
+                is_raw = true;
+                next_char(lex);
+            }
+        } else if (is_char(lex, 'r')) {
+            is_raw = true;
+            next_char(lex);
+            if (is_char(lex, 'b')) {
+                is_bytes = true;
+                next_char(lex);
+            }
+        }
+
+        // set token kind
+        if (is_bytes) {
+            tok->kind = PY_TOKEN_BYTES;
+        } else {
+            tok->kind = PY_TOKEN_STRING;
+        }
+
+        // get first quoting character
+        char quote_char = '\'';
+        if (is_char(lex, '\"')) {
+            quote_char = '\"';
+        }
+        next_char(lex);
+
+        // work out if it's a single or triple quoted literal
+        int num_quotes;
+        if (is_char_and(lex, quote_char, quote_char)) {
+            // triple quotes
+            next_char(lex);
+            next_char(lex);
+            num_quotes = 3;
+        } else {
+            // single quotes
+            num_quotes = 1;
+        }
+
+        // set start of token
+        tok->str = lex->src_cur;
+
+        // parse the literal
+        // TODO proper escaping
+        int n_closing = 0;
+        while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
+            if (is_char(lex, quote_char)) {
+                n_closing += 1;
+            } else {
+                n_closing = 0;
+                if (!is_raw && is_char(lex, '\\')) {
+                    next_char(lex);
+                }
+            }
+            next_char(lex);
+        }
+
+        // check we got the required end quotes
+        if (n_closing < num_quotes) {
+            tok->kind = PY_TOKEN_LONELY_STRING_OPEN;
+        }
+
+        // set token string (byte) length
+        tok->len = lex->src_cur - tok->str - n_closing;
+
+        // we set the length, return now so it's not set incorrectly below
+        return;
+
+    } else if (is_head_of_identifier(lex)) {
+        tok->kind = PY_TOKEN_NAME;
+
+        next_char(lex);
+
+        while (!is_end(lex) && is_tail_of_identifier(lex)) {
+            next_char(lex);
+        }
+
+    } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
+        tok->kind = PY_TOKEN_NUMBER;
+
+        next_char(lex);
+
+        while (!is_end(lex)) {
+            if (is_char_or(lex, 'e', 'E')) {
+                next_char(lex);
+                if (is_char(lex, '+') || is_char(lex, '-')) {
+                    next_char(lex);
+                }
+            } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) {
+                next_char(lex);
+            } else {
+                break;
+            }
+        }
+
+    } else {
+        // search for encoded delimiter or operator
+
+        const char *t = tok_enc;
+        uint tok_enc_index = 0;
+        for (; *t != 0 && !is_char(lex, *t); t += 1) {
+            if (*t == 'e' || *t == 'c') {
+                t += 1;
+            } else if (*t == 'E') {
+                tok_enc_index -= 1;
+                t += 1;
+            }
+            tok_enc_index += 1;
+        }
+
+        next_char(lex);
+
+        if (*t == 0) {
+            // didn't match any delimiter or operator characters
+            tok->kind = PY_TOKEN_INVALID;
+
+        } else {
+            // matched a delimiter or operator character
+
+            // get the maximum characters for a valid token
+            t += 1;
+            uint t_index = tok_enc_index;
+            for (;;) {
+                for (; *t == 'e'; t += 1) {
+                    t += 1;
+                    t_index += 1;
+                    if (is_char(lex, *t)) {
+                        next_char(lex);
+                        tok_enc_index = t_index;
+                        break;
+                    }
+                }
+
+                if (*t == 'E') {
+                    t += 1;
+                    if (is_char(lex, *t)) {
+                        next_char(lex);
+                        tok_enc_index = t_index;
+                    } else {
+                        tok->kind = PY_TOKEN_INVALID;
+                    }
+                    break;
+                }
+
+                if (*t == 'c') {
+                    t += 1;
+                    t_index += 1;
+                    if (is_char(lex, *t)) {
+                        next_char(lex);
+                        tok_enc_index = t_index;
+                        t += 1;
+                    } else {
+                        break;
+                    }
+                } else {
+                    break;
+                }
+            }
+
+            // set token kind
+            tok->kind = tok_enc_kind[tok_enc_index];
+
+            // compute bracket level for implicit line joining
+            if (tok->kind == PY_TOKEN_DEL_PAREN_OPEN || tok->kind == PY_TOKEN_DEL_BRACKET_OPEN || tok->kind == PY_TOKEN_DEL_BRACE_OPEN) {
+                lex->nested_bracket_level += 1;
+            } else if (tok->kind == PY_TOKEN_DEL_PAREN_CLOSE || tok->kind == PY_TOKEN_DEL_BRACKET_CLOSE || tok->kind == PY_TOKEN_DEL_BRACE_CLOSE) {
+                lex->nested_bracket_level -= 1;
+            }
+        }
+    }
+
+    // set token string (byte) length
+    tok->len = lex->src_cur - tok->str;
+
+    // check for keywords (must be done after setting token string length)
+    if (tok->kind == PY_TOKEN_NAME) {
+        for (int i = 0; tok_kw[i] != NULL; i++) {
+            if (py_token_is_str(tok, tok_kw[i])) {
+                tok->kind = PY_TOKEN_KW_FALSE + i;
+                break;
+            }
+        }
+    }
+}
+
+py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str) {
+    py_lexer_t *lex;
+
+    lex = m_new(py_lexer_t, 1);
+
+    //lex->name = g_strdup(src_name); // TODO
+    lex->name = src_name;
+    lex->free = free_str;
+    lex->src_beg = str;
+    lex->src_cur = str;
+    lex->src_end = str + len;
+    lex->line = 1;
+    lex->column = 1;
+    lex->cont_line = lex->line;
+    lex->emit_dent = 0;
+    lex->nested_bracket_level = 0;
+    lex->alloc_indent_level = 16;
+    lex->num_indent_level = 1;
+    lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
+    lex->indent_level[0] = 0;
+
+    // preload characters
+    // TODO unicode
+    if (len == 0) {
+        lex->chr0 = '\n'; // insert newline at end of file
+        lex->chr1 = CHR_EOF;
+        lex->chr2 = CHR_EOF;
+    } else if (len == 1) {
+        lex->chr0 = str[0];
+        if (lex->chr0 != '\n' && lex->chr0 != '\r') {
+            lex->chr1 = '\n'; // insert newline at end of file
+        } else {
+            lex->chr1 = CHR_EOF;
+        }
+        lex->chr2 = CHR_EOF;
+    } else if (len == 2) {
+        lex->chr0 = str[0];
+        lex->chr1 = str[1];
+        if (lex->chr1 != '\n' && lex->chr1 != '\r') {
+            lex->chr2 = '\n'; // insert newline at end of file
+        } else {
+            lex->chr2 = CHR_EOF;
+        }
+    } else {
+        lex->chr0 = str[0];
+        lex->chr1 = str[1];
+        lex->chr2 = str[2];
+    }
+
+    py_lexer_next_token_into(lex, &lex->tok_cur);
+    py_lexer_next_token_into(lex, &lex->tok_next);
+
+    return lex;
+}
+
+void py_lexer_free(py_lexer_t *lex) {
+    if (lex == NULL) {
+        return;
+    }
+    //m_free(lex->name);
+    if (lex->free) {
+        m_free((char*)lex->src_beg);
+    }
+    m_free(lex);
+}
+
+void py_lexer_to_next(py_lexer_t *lex) {
+    lex->tok_cur = lex->tok_next;
+    py_lexer_next_token_into(lex, &lex->tok_next);
+}
+
+const py_token_t *py_lexer_cur(const py_lexer_t *lex) {
+    return &lex->tok_cur;
+}
+
+bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind) {
+    return lex->tok_cur.kind == kind;
+}
+
+/*
+bool py_lexer_is_str(py_lexer_t *lex, const char *str) {
+    return py_token_is_str(&lex->tok_cur, str);
+}
+
+bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind) {
+    return lex->tok_next.kind == kind;
+}
+
+bool py_lexer_is_next_str(py_lexer_t *lex, const char *str) {
+    return py_token_is_str(&lex->tok_next, str);
+}
+
+bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) {
+    if (py_lexer_is_kind(lex, kind)) {
+        py_lexer_to_next(lex);
+        return true;
+    }
+    return false;
+}
+
+bool py_lexer_opt_str(py_lexer_t *lex, const char *str) {
+    if (py_lexer_is_str(lex, str)) {
+        py_lexer_to_next(lex);
+        return true;
+    }
+    return false;
+}
+*/
+
+bool py_lexer_show_error(py_lexer_t *lex, const char *msg) {
+    return py_token_show_error(&lex->tok_cur, msg);
+}
diff --git a/py/lexer.h b/py/lexer.h
new file mode 100644
index 0000000..32ab48a
--- /dev/null
+++ b/py/lexer.h
@@ -0,0 +1,141 @@
+/* lexer.h -- simple tokeniser for Python implementation
+ */
+
+#ifndef INCLUDED_LEXER_H
+#define INCLUDED_LEXER_H
+
+/* uses (byte) length instead of null termination
+ * tokens are the same - UTF-8 with (byte) length
+ */
+
+typedef enum _py_token_kind_t {
+    PY_TOKEN_END,                   // 0
+
+    PY_TOKEN_INVALID,
+    PY_TOKEN_LONELY_STRING_OPEN,
+
+    PY_TOKEN_NEWLINE,               // 3
+    PY_TOKEN_INDENT,                // 4
+    PY_TOKEN_DEDENT,                // 5
+
+    PY_TOKEN_NAME,                  // 6
+    PY_TOKEN_NUMBER,
+    PY_TOKEN_STRING,
+    PY_TOKEN_BYTES,
+
+    PY_TOKEN_ELLIPSES,
+
+    PY_TOKEN_KW_FALSE,              // 11
+    PY_TOKEN_KW_NONE,
+    PY_TOKEN_KW_TRUE,
+    PY_TOKEN_KW_AND,
+    PY_TOKEN_KW_AS,
+    PY_TOKEN_KW_ASSERT,
+    PY_TOKEN_KW_BREAK,
+    PY_TOKEN_KW_CLASS,
+    PY_TOKEN_KW_CONTINUE,
+    PY_TOKEN_KW_DEF,                // 20
+    PY_TOKEN_KW_DEL,
+    PY_TOKEN_KW_ELIF,
+    PY_TOKEN_KW_ELSE,
+    PY_TOKEN_KW_EXCEPT,
+    PY_TOKEN_KW_FINALLY,
+    PY_TOKEN_KW_FOR,
+    PY_TOKEN_KW_FROM,
+    PY_TOKEN_KW_GLOBAL,
+    PY_TOKEN_KW_IF,
+    PY_TOKEN_KW_IMPORT,             // 30
+    PY_TOKEN_KW_IN,
+    PY_TOKEN_KW_IS,
+    PY_TOKEN_KW_LAMBDA,
+    PY_TOKEN_KW_NONLOCAL,
+    PY_TOKEN_KW_NOT,
+    PY_TOKEN_KW_OR,
+    PY_TOKEN_KW_PASS,
+    PY_TOKEN_KW_RAISE,
+    PY_TOKEN_KW_RETURN,
+    PY_TOKEN_KW_TRY,                // 40
+    PY_TOKEN_KW_WHILE,
+    PY_TOKEN_KW_WITH,
+    PY_TOKEN_KW_YIELD,
+
+    PY_TOKEN_OP_PLUS,               // 44
+    PY_TOKEN_OP_MINUS,
+    PY_TOKEN_OP_STAR,
+    PY_TOKEN_OP_DBL_STAR,
+    PY_TOKEN_OP_SLASH,
+    PY_TOKEN_OP_DBL_SLASH,
+    PY_TOKEN_OP_PERCENT,
+    PY_TOKEN_OP_LESS,
+    PY_TOKEN_OP_DBL_LESS,
+    PY_TOKEN_OP_MORE,
+    PY_TOKEN_OP_DBL_MORE,           // 54
+    PY_TOKEN_OP_AMPERSAND,
+    PY_TOKEN_OP_PIPE,
+    PY_TOKEN_OP_CARET,
+    PY_TOKEN_OP_TILDE,
+    PY_TOKEN_OP_LESS_EQUAL,
+    PY_TOKEN_OP_MORE_EQUAL,
+    PY_TOKEN_OP_DBL_EQUAL,
+    PY_TOKEN_OP_NOT_EQUAL,
+
+    PY_TOKEN_DEL_PAREN_OPEN,        // 63
+    PY_TOKEN_DEL_PAREN_CLOSE,
+    PY_TOKEN_DEL_BRACKET_OPEN,
+    PY_TOKEN_DEL_BRACKET_CLOSE,
+    PY_TOKEN_DEL_BRACE_OPEN,
+    PY_TOKEN_DEL_BRACE_CLOSE,
+    PY_TOKEN_DEL_COMMA,
+    PY_TOKEN_DEL_COLON,
+    PY_TOKEN_DEL_PERIOD,
+    PY_TOKEN_DEL_SEMICOLON,
+    PY_TOKEN_DEL_AT,                // 73
+    PY_TOKEN_DEL_EQUAL,
+    PY_TOKEN_DEL_PLUS_EQUAL,
+    PY_TOKEN_DEL_MINUS_EQUAL,
+    PY_TOKEN_DEL_STAR_EQUAL,
+    PY_TOKEN_DEL_SLASH_EQUAL,
+    PY_TOKEN_DEL_DBL_SLASH_EQUAL,
+    PY_TOKEN_DEL_PERCENT_EQUAL,
+    PY_TOKEN_DEL_AMPERSAND_EQUAL,
+    PY_TOKEN_DEL_PIPE_EQUAL,
+    PY_TOKEN_DEL_CARET_EQUAL,       // 83
+    PY_TOKEN_DEL_DBL_MORE_EQUAL,
+    PY_TOKEN_DEL_DBL_LESS_EQUAL,
+    PY_TOKEN_DEL_DBL_STAR_EQUAL,
+    PY_TOKEN_DEL_MINUS_MORE,
+} py_token_kind_t;
+
+typedef struct _py_token_t {
+    const char *src_name;       // (file) name of source
+    uint src_line;              // actual source line
+    uint src_column;            // actual source column
+
+    py_token_kind_t kind;       // kind of token
+    uint cont_line;             // token belongs to this line in a continued line
+    const char *str;            // string of token
+    uint len;                   // (byte) length of string of token
+} py_token_t;
+
+typedef struct _py_lexer_t py_lexer_t;
+
+void py_token_show(const py_token_t *tok);
+void py_token_show_error_prefix(const py_token_t *tok);
+bool py_token_show_error(const py_token_t *tok, const char *msg);
+
+py_lexer_t *py_lexer_from_file(const char *filename);
+py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
+void py_lexer_free(py_lexer_t *lex);
+void py_lexer_to_next(py_lexer_t *lex);
+const py_token_t *py_lexer_cur(const py_lexer_t *lex);
+bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
+/* unused
+bool py_lexer_is_str(py_lexer_t *lex, const char *str);
+bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_is_next_str(py_lexer_t *lex, const char *str);
+bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
+*/
+bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
+
+#endif /* INCLUDED_LEXER_H */
diff --git a/py/lexerfile.c b/py/lexerfile.c
new file mode 100644
index 0000000..74bb5a0
--- /dev/null
+++ b/py/lexerfile.c
@@ -0,0 +1,23 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "misc.h"
+#include "lexer.h"
+
+py_lexer_t *py_lexer_from_file(const char *filename) {
+    // TODO abstract away file functionality
+    int fd = open(filename, O_RDONLY);
+    if (fd < 0) {
+        printf("cannot open file %s\n", filename);
+        return NULL;
+    }
+    uint size = lseek(fd, 0, SEEK_END);
+    lseek(fd, 0, SEEK_SET);
+    char *data = m_new(char, size);
+    read(fd, data, size);
+    close(fd);
+
+    return py_lexer_from_str_len(filename, data, size, true);
+}
diff --git a/py/machine.h b/py/machine.h
new file mode 100644
index 0000000..fa39c8f
--- /dev/null
+++ b/py/machine.h
@@ -0,0 +1,4 @@
+typedef int64_t machine_int_t; // must be pointer size
+typedef uint64_t machine_uint_t; // must be pointer size
+typedef void *machine_ptr_t; // must be of pointer size
+typedef double machine_float_t;
diff --git a/py/main.c b/py/main.c
new file mode 100644
index 0000000..7b17c38
--- /dev/null
+++ b/py/main.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+#include "compile.h"
+#include "runtime.h"
+
+int main(int argc, char **argv) {
+    qstr_init();
+    rt_init();
+
+    if (argc != 2) {
+        printf("usage: py <file>\n");
+        return 1;
+    }
+    py_lexer_t *lex = py_lexer_from_file(argv[1]);
+    //const char *pysrc = "def f():\n  x=x+1\n  print(42)\n";
+    //py_lexer_t *lex = py_lexer_from_str_len("<>", pysrc, strlen(pysrc), false);
+    if (lex == NULL) {
+        return 1;
+    }
+
+    if (0) {
+        while (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
+            py_token_show(py_lexer_cur(lex));
+            py_lexer_to_next(lex);
+        }
+    } else {
+        py_parse_node_t pn = py_parse(lex, 0);
+        //printf("----------------\n");
+        //parse_node_show(pn, 0);
+        //printf("----------------\n");
+        py_compile(pn);
+        //printf("----------------\n");
+    }
+
+    py_lexer_free(lex);
+
+    if (1) {
+        // execute it
+        py_obj_t module_fun = rt_make_function_from_id(1);
+        if (module_fun != py_const_none) {
+            py_obj_t ret = rt_call_function_0(module_fun);
+            printf("done! got: ");
+            py_obj_print(ret);
+            printf("\n");
+        }
+    }
+
+    rt_deinit();
+
+    //printf("total bytes = %d\n", m_get_total_bytes_allocated());
+    return 0;
+}
diff --git a/py/malloc.c b/py/malloc.c
new file mode 100644
index 0000000..8775f68
--- /dev/null
+++ b/py/malloc.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "misc.h"
+
+static int total_bytes_allocated = 0;
+
+void m_free(void *ptr) {
+    if (ptr != NULL) {
+        free(ptr);
+    }
+}
+
+void *m_malloc(int num_bytes) {
+    if (num_bytes == 0) {
+        return NULL;
+    }
+    void *ptr = malloc(num_bytes);
+    if (ptr == NULL) {
+        printf("could not allocate memory, allocating %d bytes\n", num_bytes);
+        return NULL;
+    }
+    total_bytes_allocated += num_bytes;
+    return ptr;
+}
+
+void *m_malloc0(int num_bytes) {
+    if (num_bytes == 0) {
+        return NULL;
+    }
+    void *ptr = calloc(1, num_bytes);
+    if (ptr == NULL) {
+        printf("could not allocate memory, allocating %d bytes\n", num_bytes);
+        return NULL;
+    }
+    total_bytes_allocated += num_bytes;
+    return ptr;
+}
+
+void *m_realloc(void *ptr, int num_bytes) {
+    if (num_bytes == 0) {
+        free(ptr);
+        return NULL;
+    }
+    ptr = realloc(ptr, num_bytes);
+    if (ptr == NULL) {
+        printf("could not allocate memory, reallocating %d bytes\n", num_bytes);
+        return NULL;
+    }
+    total_bytes_allocated += num_bytes;
+    return ptr;
+}
+
+int m_get_total_bytes_allocated() {
+    return total_bytes_allocated;
+}
diff --git a/py/misc.c b/py/misc.c
new file mode 100644
index 0000000..a5bf8d5
--- /dev/null
+++ b/py/misc.c
@@ -0,0 +1,84 @@
+#include <stdint.h>
+#include <string.h>
+
+#include "misc.h"
+
+// attribute flags
+#define FL_PRINT (0x01)
+#define FL_SPACE (0x02)
+#define FL_DIGIT (0x04)
+#define FL_ALPHA (0x08)
+#define FL_UPPER (0x10)
+#define FL_LOWER (0x20)
+
+// shorthand character attributes
+#define AT_PR (FL_PRINT)
+#define AT_SP (FL_SPACE | FL_PRINT)
+#define AT_DI (FL_DIGIT | FL_PRINT)
+#define AT_AL (FL_ALPHA | FL_PRINT)
+#define AT_UP (FL_UPPER | FL_ALPHA | FL_PRINT)
+#define AT_LO (FL_LOWER | FL_ALPHA | FL_PRINT)
+
+// table of attributes for ascii characters
+static const uint8_t attr[] = {
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, AT_SP, AT_SP, AT_SP, 0, AT_SP, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    AT_SP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+    AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+    AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI,
+    AT_DI, AT_DI, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+    AT_PR, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+    AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+    AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP,
+    AT_UP, AT_UP, AT_UP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR,
+    AT_PR, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+    AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+    AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO,
+    AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
+};
+
+unichar g_utf8_get_char(const char *s) {
+    return *s;
+}
+
+char *g_utf8_next_char(const char *s) {
+    return (char*)(s + 1);
+}
+
+bool g_unichar_isspace(unichar c) {
+    return c < 128 && (attr[c] & FL_SPACE) != 0;
+}
+
+bool g_unichar_isalpha(unichar c) {
+    return c < 128 && (attr[c] & FL_ALPHA) != 0;
+}
+
+bool g_unichar_isprint(unichar c) {
+    return c < 128 && (attr[c] & FL_PRINT) != 0;
+}
+
+bool g_unichar_isdigit(unichar c) {
+    return c < 128 && (attr[c] & FL_DIGIT) != 0;
+}
+
+/*
+bool char_is_alpha_or_digit(unichar c) {
+    return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0;
+}
+
+bool char_is_upper(unichar c) {
+    return c < 128 && (attr[c] & FL_UPPER) != 0;
+}
+
+bool char_is_lower(unichar c) {
+    return c < 128 && (attr[c] & FL_LOWER) != 0;
+}
+*/
+
+/*
+char *g_strdup(const char *s) {
+    return strdup(s);
+}
+*/
diff --git a/py/misc.h b/py/misc.h
new file mode 100644
index 0000000..9ba80a5
--- /dev/null
+++ b/py/misc.h
@@ -0,0 +1,91 @@
+// a mini library of useful types and functions
+
+#ifndef _INCLUDED_MINILIB_H
+#define _INCLUDED_MINILIB_H
+
+/** types *******************************************************/
+
+typedef int bool;
+enum {
+    false = 0,
+    true = 1
+};
+
+typedef unsigned char byte;
+typedef unsigned int uint;
+
+/** memomry allocation ******************************************/
+
+#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num))))
+#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num))))
+#define m_renew(type, ptr, num) ((type*)(m_realloc((ptr), sizeof(type) * (num))))
+
+void m_free(void *ptr);
+void *m_malloc(int num_bytes);
+void *m_malloc0(int num_bytes);
+void *m_realloc(void *ptr, int num_bytes);
+
+int m_get_total_bytes_allocated();
+
+/** unichar / UTF-8 *********************************************/
+
+typedef int unichar; // TODO
+
+unichar g_utf8_get_char(const char *s);
+char *g_utf8_next_char(const char *s);
+
+bool g_unichar_isspace(unichar c);
+bool g_unichar_isalpha(unichar c);
+bool g_unichar_isprint(unichar c);
+bool g_unichar_isdigit(unichar c);
+
+//char *g_strdup(const char *s);
+
+/** blob ********************************************************/
+
+/*
+unsigned short decode_le16(byte *buf);
+unsigned int decode_le32(byte *buf);
+void encode_le16(byte *buf, unsigned short i);
+void encode_le32(byte *buf, unsigned int i);
+*/
+
+/** string ******************************************************/
+
+/*
+#define streq(s1, s2) (strcmp((s1), (s2)) == 0)
+*/
+
+/** variable string *********************************************/
+
+/*
+typedef struct _vstr_t vstr_t;
+
+vstr_t *vstr_new();
+void vstr_free(vstr_t *vstr);
+void vstr_reset(vstr_t *vstr);
+bool vstr_had_error(vstr_t *vstr);
+char *vstr_str(vstr_t *vstr);
+int vstr_len(vstr_t *vstr);
+void vstr_hint_size(vstr_t *vstr, int size);
+char *vstr_add_len(vstr_t *vstr, int len);
+void vstr_add_str(vstr_t *vstr, const char *str);
+void vstr_add_strn(vstr_t *vstr, const char *str, int len);
+void vstr_add_byte(vstr_t *vstr, byte v);
+void vstr_add_le16(vstr_t *vstr, unsigned short v);
+void vstr_add_le32(vstr_t *vstr, unsigned int v);
+void vstr_cut_tail(vstr_t *vstr, int len);
+void vstr_printf(vstr_t *vstr, const char *fmt, ...);
+*/
+
+/** unique string ***********************************************/
+
+typedef unsigned int qstr;
+
+void qstr_init();
+qstr qstr_from_str_static(const char *str);
+qstr qstr_from_str_take(char *str);
+qstr qstr_from_strn_copy(const char *str, int len);
+const char* qstr_str(qstr qstr);
+
+#endif // _INCLUDED_MINILIB_H
diff --git a/py/parse.c b/py/parse.c
new file mode 100644
index 0000000..94a5a5d
--- /dev/null
+++ b/py/parse.c
@@ -0,0 +1,565 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "lexer.h"
+#include "machine.h"
+#include "parse.h"
+
+#define RULE_ACT_KIND_MASK      (0xf0)
+#define RULE_ACT_ARG_MASK       (0x0f)
+#define RULE_ACT_OR             (0x10)
+#define RULE_ACT_AND            (0x20)
+#define RULE_ACT_LIST           (0x30)
+
+#define RULE_ARG_BLANK          (0x0000)
+#define RULE_ARG_KIND_MASK      (0xf000)
+#define RULE_ARG_ARG_MASK       (0x0fff)
+#define RULE_ARG_TOK            (0x1000)
+#define RULE_ARG_RULE           (0x2000)
+#define RULE_ARG_OPT_TOK        (0x3000)
+#define RULE_ARG_OPT_RULE       (0x4000)
+
+// (un)comment to use rule names; for debugging
+//#define USE_RULE_NAME (1)
+
+typedef struct _rule_t {
+    byte rule_id;
+    byte act;
+#ifdef USE_RULE_NAME
+    const char *rule_name;
+#endif
+    uint16_t arg[];
+} rule_t;
+
+enum {
+    RULE_none = 0,
+#define DEF_RULE(rule, comp, kind, arg...) RULE_##rule,
+#include "grammar.h"
+#undef DEF_RULE
+    RULE_maximum_number_of,
+};
+
+#define or(n)                   (RULE_ACT_OR | n)
+#define and(n)                  (RULE_ACT_AND | n)
+#define one_or_more             (RULE_ACT_LIST | 2)
+#define list                    (RULE_ACT_LIST | 1)
+#define list_with_end           (RULE_ACT_LIST | 3)
+#define tok(t)                  (RULE_ARG_TOK | PY_TOKEN_##t)
+#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
+#define opt_tok(t)              (RULE_ARG_OPT_TOK | PY_TOKEN_##t)
+#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
+#ifdef USE_RULE_NAME
+#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, #rule, { arg } };
+#else
+#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, { arg } };
+#endif
+#include "grammar.h"
+#undef or
+#undef and
+#undef list
+#undef list_with_end
+#undef tok
+#undef rule
+#undef opt_tok
+#undef opt_rule
+#undef one_or_more
+#undef DEF_RULE
+
+static rule_t *rules[] = {
+    NULL,
+#define DEF_RULE(rule, comp, kind, arg...) &rule_##rule,
+#include "grammar.h"
+#undef DEF_RULE
+};
+
+typedef struct _rule_stack_t {
+    byte rule_id;
+    int32_t arg_i; // what should be the size and signedness?
+} rule_stack_t;
+
+typedef struct _parser_t {
+    uint rule_stack_alloc;
+    uint rule_stack_top;
+    rule_stack_t *rule_stack;
+
+    uint result_stack_top;
+    py_parse_node_t *result_stack;
+} parser_t;
+
+static void push_rule(parser_t *parser, rule_t *rule, int arg_i) {
+    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
+        parser->rule_stack_alloc *= 2;
+        parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc);
+    }
+    parser->rule_stack[parser->rule_stack_top].rule_id = rule->rule_id;
+    parser->rule_stack[parser->rule_stack_top].arg_i = arg_i;
+    parser->rule_stack_top += 1;
+}
+
+static void push_rule_from_arg(parser_t *parser, uint arg) {
+    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
+    uint rule_id = arg & RULE_ARG_ARG_MASK;
+    assert(rule_id < RULE_maximum_number_of);
+    push_rule(parser, rules[rule_id], 0);
+}
+
+static void pop_rule(parser_t *parser, rule_t **rule, uint *arg_i) {
+    parser->rule_stack_top -= 1;
+    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
+    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
+}
+
+py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
+    return (py_parse_node_t)(kind | (arg << 4));
+}
+
+int num_parse_nodes_allocated = 0;
+py_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) {
+    py_parse_node_struct_t *pn = m_malloc(sizeof(py_parse_node_struct_t) + num_args * sizeof(py_parse_node_t));
+    pn->source = 0; // TODO
+    pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8);
+    num_parse_nodes_allocated += 1;
+    return pn;
+}
+
+void parse_node_show(py_parse_node_t pn, int indent) {
+    for (int i = 0; i < indent; i++) {
+        printf(" ");
+    }
+    if (PY_PARSE_NODE_IS_NULL(pn)) {
+        printf("NULL\n");
+    } else if (PY_PARSE_NODE_IS_LEAF(pn)) {
+        int arg = PY_PARSE_NODE_LEAF_ARG(pn);
+        switch (PY_PARSE_NODE_LEAF_KIND(pn)) {
+            case PY_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
+            case PY_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break;
+            case PY_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
+            case PY_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
+            case PY_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
+            case PY_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
+            case PY_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break;
+            default: assert(0);
+        }
+    } else {
+        py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pn;
+        int n = pns2->kind_num_nodes >> 8;
+#ifdef USE_RULE_NAME
+        printf("%s(%d) (n=%d)\n", rules[PY_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, PY_PARSE_NODE_STRUCT_KIND(pns2), n);
+#else
+        printf("rule(%u) (n=%d)\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns2), n);
+#endif
+        for (int i = 0; i < n; i++) {
+            parse_node_show(pns2->nodes[i], indent + 2);
+        }
+    }
+}
+
+/*
+static void result_stack_show(parser_t *parser) {
+    printf("result stack, most recent first\n");
+    for (int i = parser->result_stack_top - 1; i >= 0; i--) {
+        parse_node_show(parser->result_stack[i], 0);
+    }
+}
+*/
+
+static py_parse_node_t pop_result(parser_t *parser) {
+    assert(parser->result_stack_top > 0);
+    return parser->result_stack[--parser->result_stack_top];
+}
+
+static py_parse_node_t peek_result(parser_t *parser, int pos) {
+    assert(parser->result_stack_top > pos);
+    return parser->result_stack[parser->result_stack_top - 1 - pos];
+}
+
+static void push_result_node(parser_t *parser, py_parse_node_t pn) {
+    parser->result_stack[parser->result_stack_top++] = pn;
+}
+
+static void push_result_token(parser_t *parser, const py_lexer_t *lex) {
+    const py_token_t *tok = py_lexer_cur(lex);
+    py_parse_node_t pn;
+    if (tok->kind == PY_TOKEN_NAME) {
+        pn = py_parse_node_new_leaf(PY_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len));
+    } else if (tok->kind == PY_TOKEN_NUMBER) {
+        bool dec = false;
+        bool small_int = true;
+        int int_val = 0;
+        int len = tok->len;
+        const char *str = tok->str;
+        int base = 10;
+        int i = 0;
+        if (len >= 3 && str[0] == '0') {
+            if (str[1] == 'o' || str[1] == 'O') {
+                // octal
+                base = 8;
+                i = 2;
+            } else if (str[1] == 'x' || str[1] == 'X') {
+                // hexadecimal
+                base = 16;
+                i = 2;
+            } else if (str[1] == 'b' || str[1] == 'B') {
+                // binary
+                base = 2;
+                i = 2;
+            }
+        }
+        for (; i < len; i++) {
+            if (g_unichar_isdigit(str[i]) && str[i] - '0' < base) {
+                int_val = base * int_val + str[i] - '0';
+            } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
+                int_val = base * int_val + str[i] - 'a' + 10;
+            } else if (base == 16 && 'F' <= str[i] && str[i] <= 'F') {
+                int_val = base * int_val + str[i] - 'A' + 10;
+            } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E') {
+                dec = true;
+                break;
+            } else {
+                small_int = false;
+                break;
+            }
+        }
+        if (dec) {
+            pn = py_parse_node_new_leaf(PY_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len));
+        } else if (small_int && -0x10000 <= int_val && int_val <= 0xffff) {
+            pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, int_val);
+        } else {
+            pn = py_parse_node_new_leaf(PY_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len));
+        }
+    } else if (tok->kind == PY_TOKEN_STRING) {
+        pn = py_parse_node_new_leaf(PY_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len));
+    } else if (tok->kind == PY_TOKEN_BYTES) {
+        pn = py_parse_node_new_leaf(PY_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len));
+    } else {
+        pn = py_parse_node_new_leaf(PY_PARSE_NODE_TOKEN, tok->kind);
+    }
+    push_result_node(parser, pn);
+}
+
+static void push_result_rule(parser_t *parser, rule_t *rule, int num_args) {
+    py_parse_node_struct_t *pn = parse_node_new_struct(rule->rule_id, num_args);
+    for (int i = num_args; i > 0; i--) {
+        pn->nodes[i - 1] = pop_result(parser);
+    }
+    push_result_node(parser, (py_parse_node_t)pn);
+}
+
+py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) {
+    wanted_rule = RULE_file_input;
+    parser_t *parser = m_new(parser_t, 1);
+    parser->rule_stack_alloc = 64;
+    parser->rule_stack_top = 0;
+    parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);
+
+    parser->result_stack = m_new(py_parse_node_t, 1000);
+    parser->result_stack_top = 0;
+
+    push_rule(parser, rules[wanted_rule], 0);
+
+    uint n, i;
+    bool backtrack = false;
+    rule_t *rule;
+    py_token_kind_t tok_kind;
+    bool emit_rule;
+    bool had_trailing_sep;
+
+    for (;;) {
+        next_rule:
+        if (parser->rule_stack_top == 0) {
+            break;
+        }
+
+        pop_rule(parser, &rule, &i);
+        n = rule->act & RULE_ACT_ARG_MASK;
+
+        /*
+        // debugging
+        printf("depth=%d ", parser->rule_stack_top);
+        for (int j = 0; j < parser->rule_stack_top; ++j) {
+            printf(" ");
+        }
+        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
+        */
+
+        switch (rule->act & RULE_ACT_KIND_MASK) {
+            case RULE_ACT_OR:
+                if (i > 0 && !backtrack) {
+                    goto next_rule;
+                } else {
+                    backtrack = false;
+                }
+                for (; i < n - 1; ++i) {
+                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
+                        case RULE_ARG_TOK:
+                            if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
+                                push_result_token(parser, lex);
+                                py_lexer_to_next(lex);
+                                goto next_rule;
+                            }
+                            break;
+                        case RULE_ARG_RULE:
+                            push_rule(parser, rule, i + 1);
+                            push_rule_from_arg(parser, rule->arg[i]);
+                            goto next_rule;
+                        default:
+                            assert(0);
+                    }
+                }
+                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                    if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
+                        push_result_token(parser, lex);
+                        py_lexer_to_next(lex);
+                    } else {
+                        backtrack = true;
+                        goto next_rule;
+                    }
+                } else {
+                    push_rule_from_arg(parser, rule->arg[i]);
+                }
+                break;
+
+            case RULE_ACT_AND:
+
+                // failed, backtrack if we can, else syntax error
+                if (backtrack) {
+                    assert(i > 0);
+                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
+                        // an optional rule that failed, so continue with next arg
+                        push_result_node(parser, PY_PARSE_NODE_NULL);
+                        backtrack = false;
+                    } else {
+                        // a mandatory rule that failed, so propagate backtrack
+                        if (i > 1) {
+                            // already eaten tokens so can't backtrack
+                            goto syntax_error;
+                        } else {
+                            goto next_rule;
+                        }
+                    }
+                }
+
+                // progress through the rule
+                for (; i < n; ++i) {
+                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
+                        case RULE_ARG_TOK:
+                            // need to match a token
+                            tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
+                            if (py_lexer_is_kind(lex, tok_kind)) {
+                                // matched token
+                                if (tok_kind == PY_TOKEN_NAME) {
+                                    push_result_token(parser, lex);
+                                }
+                                py_lexer_to_next(lex);
+                            } else {
+                                // failed to match token
+                                if (i > 0) {
+                                    // already eaten tokens so can't backtrack
+                                    goto syntax_error;
+                                } else {
+                                    // this rule failed, so backtrack
+                                    backtrack = true;
+                                    goto next_rule;
+                                }
+                            }
+                            break;
+                        case RULE_ARG_RULE:
+                            //if (i + 1 < n) {
+                                push_rule(parser, rule, i + 1);
+                            //}
+                            push_rule_from_arg(parser, rule->arg[i]);
+                            goto next_rule;
+                        case RULE_ARG_OPT_RULE:
+                            push_rule(parser, rule, i + 1);
+                            push_rule_from_arg(parser, rule->arg[i]);
+                            goto next_rule;
+                        default:
+                            assert(0);
+                    }
+                }
+
+                assert(i == n);
+
+                // matched the rule, so now build the corresponding parse_node
+
+                // count number of arguments for the parse_node
+                i = 0;
+                emit_rule = false;
+                for (int x = 0; x < n; ++x) {
+                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                        tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
+                        if (tok_kind >= PY_TOKEN_NAME) {
+                            emit_rule = true;
+                        }
+                        if (tok_kind == PY_TOKEN_NAME) {
+                            // only tokens which were names are pushed to stack
+                            i += 1;
+                        }
+                    } else {
+                        // rules are always pushed
+                        i += 1;
+                    }
+                }
+
+                // always emit these rules, even if they have only 1 argument
+                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
+                    emit_rule = true;
+                }
+
+                // never emit these rules if they have only 1 argument
+                // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
+                if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
+                    emit_rule = false;
+                }
+
+                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
+                if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) {
+                    emit_rule = true;
+                    push_result_node(parser, PY_PARSE_NODE_NULL);
+                    i += 1;
+                }
+
+                int num_not_nil = 0;
+                for (int x = 0; x < i; ++x) {
+                    if (peek_result(parser, x) != PY_PARSE_NODE_NULL) {
+                        num_not_nil += 1;
+                    }
+                }
+                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
+                if (emit_rule) {
+                    push_result_rule(parser, rule, i);
+                } else if (num_not_nil == 0) {
+                    push_result_rule(parser, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
+                    //result_stack_show(parser);
+                    //assert(0);
+                } else if (num_not_nil == 1) {
+                    // single result, leave it on stack
+                    py_parse_node_t pn = PY_PARSE_NODE_NULL;
+                    for (int x = 0; x < i; ++x) {
+                        py_parse_node_t pn2 = pop_result(parser);
+                        if (pn2 != PY_PARSE_NODE_NULL) {
+                            pn = pn2;
+                        }
+                    }
+                    push_result_node(parser, pn);
+                } else {
+                    push_result_rule(parser, rule, i);
+                }
+                break;
+
+            case RULE_ACT_LIST:
+                // n=2 is: item item*
+                // n=1 is: item (sep item)*
+                // n=3 is: item (sep item)* [sep]
+                if (backtrack) {
+                    list_backtrack:
+                    had_trailing_sep = false;
+                    if (n == 2) {
+                        if (i == 1) {
+                            // fail on item, first time round; propagate backtrack
+                            goto next_rule;
+                        } else {
+                            // fail on item, in later rounds; finish with this rule
+                            backtrack = false;
+                        }
+                    } else {
+                        if (i == 1) {
+                            // fail on item, first time round; propagate backtrack
+                            goto next_rule;
+                        } else if ((i & 1) == 1) {
+                            // fail on item, in later rounds; have eaten tokens so can't backtrack
+                            if (n == 3) {
+                                // list allows trailing separator; finish parsing list
+                                had_trailing_sep = true;
+                                backtrack = false;
+                            } else {
+                                // list doesn't allowing trailing separator; fail
+                                goto syntax_error;
+                            }
+                        } else {
+                            // fail on separator; finish parsing list
+                            backtrack = false;
+                        }
+                    }
+                } else {
+                    for (;;) {
+                        uint arg = rule->arg[i & 1 & n];
+                        switch (arg & RULE_ARG_KIND_MASK) {
+                            case RULE_ARG_TOK:
+                                if (py_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
+                                    if (i & 1 & n) {
+                                        // separators which are tokens are not pushed to result stack
+                                    } else {
+                                        push_result_token(parser, lex);
+                                    }
+                                    py_lexer_to_next(lex);
+                                    // got element of list, so continue parsing list
+                                    i += 1;
+                                } else {
+                                    // couldn't get element of list
+                                    i += 1;
+                                    backtrack = true;
+                                    goto list_backtrack;
+                                }
+                                break;
+                            case RULE_ARG_RULE:
+                                push_rule(parser, rule, i + 1);
+                                push_rule_from_arg(parser, arg);
+                                goto next_rule;
+                            default:
+                                assert(0);
+                        }
+                    }
+                }
+                assert(i >= 1);
+
+                // compute number of elements in list, result in i
+                i -= 1;
+                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                    // don't count separators when they are tokens
+                    i = (i + 1) / 2;
+                }
+
+                if (i == 1) {
+                    // list matched single item
+                    if (had_trailing_sep) {
+                        // if there was a trailing separator, make a list of a single item
+                        push_result_rule(parser, rule, i);
+                    } else {
+                        // just leave single item on stack (ie don't wrap in a list)
+                    }
+                } else {
+                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
+                    push_result_rule(parser, rule, i);
+                }
+                break;
+
+            default:
+                assert(0);
+        }
+    }
+    if (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
+        py_lexer_show_error(lex, "unexpected token at end:");
+        py_token_show(py_lexer_cur(lex));
+    }
+    //printf("--------------\n");
+    //result_stack_show(parser);
+    assert(parser->result_stack_top == 1);
+    //printf("maximum depth: %d\n", parser->rule_stack_alloc);
+    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
+    return parser->result_stack[0];
+
+syntax_error:
+    py_lexer_show_error(lex, "syntax error:");
+#ifdef USE_RULE_NAME
+    py_lexer_show_error(lex, rule->rule_name);
+#endif
+    py_token_show(py_lexer_cur(lex));
+    return PY_PARSE_NODE_NULL;
+}
diff --git a/py/parse.h b/py/parse.h
new file mode 100644
index 0000000..07d553c
--- /dev/null
+++ b/py/parse.h
@@ -0,0 +1,54 @@
+struct _py_lexer_t;
+
+// a py_parse_node_t is:
+//  - 0000...0000: no node
+//  - xxxx...0001: an identifier; bits 4 and above are the qstr
+//  - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement
+//  - xxxx...0101: an integer; bits 4 and above are the qstr holding the value
+//  - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value
+//  - xxxx...1001: a string; bits 4 and above are the qstr holding the value
+//  - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value
+//  - xxxx...1101: a token; bits 4 and above are py_token_kind_t
+//  - xxxx...xxx0: pointer to py_parse_node_struct_t
+
+#define PY_PARSE_NODE_NULL      (0)
+#define PY_PARSE_NODE_ID        (0x1)
+#define PY_PARSE_NODE_SMALL_INT (0x3)
+#define PY_PARSE_NODE_INTEGER   (0x5)
+#define PY_PARSE_NODE_DECIMAL   (0x7)
+#define PY_PARSE_NODE_STRING    (0x9)
+#define PY_PARSE_NODE_BYTES     (0xb)
+#define PY_PARSE_NODE_TOKEN     (0xd)
+
+typedef machine_uint_t py_parse_node_t; // must be pointer size
+
+typedef struct _py_parse_node_struct_t {
+    uint32_t source;            // file identifier, and line number
+    uint32_t kind_num_nodes;    // parse node kind, and number of nodes
+    py_parse_node_t nodes[];    // nodes
+} py_parse_node_struct_t;
+
+// macros for py_parse_node_t usage
+// some of these evaluate their argument more than once
+
+#define PY_PARSE_NODE_IS_NULL(pn) ((pn) == PY_PARSE_NODE_NULL)
+#define PY_PARSE_NODE_IS_LEAF(pn) ((pn) & 1)
+#define PY_PARSE_NODE_IS_STRUCT(pn) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0)
+#define PY_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0 && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)(pn)) == (k))
+
+#define PY_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == PY_PARSE_NODE_ID)
+#define PY_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == PY_PARSE_NODE_SMALL_INT)
+#define PY_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == PY_PARSE_NODE_TOKEN)
+#define PY_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (PY_PARSE_NODE_TOKEN | (k << 4)))
+
+#define PY_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf)
+// TODO should probably have int and uint versions of this macro
+#define PY_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4)
+#define PY_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff)
+#define PY_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
+
+py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg);
+
+void parse_node_show(py_parse_node_t pn, int indent);
+
+py_parse_node_t py_parse(struct _py_lexer_t *lex, int wanted_rule);
diff --git a/py/qstr.c b/py/qstr.c
new file mode 100644
index 0000000..33d15c7
--- /dev/null
+++ b/py/qstr.c
@@ -0,0 +1,56 @@
+#include <assert.h>
+#include <string.h>
+
+#include "misc.h"
+
+static int qstrs_alloc;
+static int qstrs_len;
+static const char **qstrs;
+
+void qstr_init() {
+    qstrs_alloc = 400;
+    qstrs_len = 1;
+    qstrs = m_new(const char*, qstrs_alloc);
+    qstrs[0] = "nil";
+}
+
+static qstr qstr_add(const char *str) {
+    if (qstrs_len >= qstrs_alloc) {
+        qstrs_alloc *= 2;
+        qstrs = m_renew(const char*, qstrs, qstrs_alloc);
+    }
+    qstrs[qstrs_len++] = str;
+    return qstrs_len - 1;
+}
+
+qstr qstr_from_str_static(const char *str) {
+    for (int i = 0; i < qstrs_len; i++) {
+        if (strcmp(qstrs[i], str) == 0) {
+            return i;
+        }
+    }
+    return qstr_add(str);
+}
+
+qstr qstr_from_str_take(char *str) {
+    for (int i = 0; i < qstrs_len; i++) {
+        if (strcmp(qstrs[i], str) == 0) {
+            m_free(str);
+            return i;
+        }
+    }
+    return qstr_add(str);
+}
+
+qstr qstr_from_strn_copy(const char *str, int len) {
+    for (int i = 0; i < qstrs_len; i++) {
+        if (strncmp(qstrs[i], str, len) == 0 && qstrs[i][len] == '\0') {
+            return i;
+        }
+    }
+    return qstr_add(strndup(str, len));
+}
+
+const char *qstr_str(qstr qstr) {
+    return qstrs[qstr];
+}
diff --git a/py/runtime.c b/py/runtime.c
new file mode 100644
index 0000000..bf2e2ee
--- /dev/null
+++ b/py/runtime.c
@@ -0,0 +1,944 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "runtime.h"
+#include "bc.h"
+
+#define DEBUG_printf(args...) (void)0
+//#define DEBUG_printf(args...) printf(args)
+
+#define DEBUG_OP_printf(args...) (void)0
+//#define DEBUG_OP_printf(args...) printf(args)
+
+// enable/disable float support with this definition
+#define PY_FLOAT (1)
+
+typedef machine_int_t py_small_int_t;
+
+#define IS_O(o, k) (((((py_small_int_t)(o)) & 1) == 0) && (((py_obj_base_t*)(o))->kind == (k)))
+#define IS_SMALL_INT(o) (((py_small_int_t)(o)) & 1)
+#define FROM_SMALL_INT(o) (((py_small_int_t)(o)) >> 1)
+#define TO_SMALL_INT(o) ((py_obj_t)(((o) << 1) | 1))
+
+#ifdef PY_FLOAT
+typedef machine_float_t float_t;
+#endif
+
+typedef enum {
+    O_CONST,
+    O_STR,
+#ifdef PY_FLOAT
+    O_FLOAT,
+#endif
+    O_FUN_0,
+    O_FUN_1,
+    O_FUN_2,
+    O_FUN_N,
+    O_FUN_BC,
+    O_BOUND_METH,
+    O_LIST,
+    O_SET,
+    O_MAP,
+    O_CLASS,
+} py_obj_kind_t;
+
+typedef enum {
+    MAP_QSTR,
+    MAP_PY_OBJ,
+} py_map_kind_t;
+
+typedef struct _py_map_elem_t {
+    py_obj_t key;
+    py_obj_t value;
+} py_map_elem_t;
+
+typedef struct _py_map_t {
+    py_map_kind_t kind;
+    machine_uint_t alloc;
+    machine_uint_t used;
+    py_map_elem_t *table;
+} py_map_t;
+
+typedef struct _py_obj_base_t {
+    py_obj_kind_t kind;
+    union {
+        const char *id;
+        qstr u_str;
+#ifdef PY_FLOAT
+        float_t flt;
+#endif
+        struct { // for O_FUN_[012N]
+            void *fun;
+            int n_args;
+        } u_fun;
+        struct { // for O_FUN_BC
+            byte *code;
+            uint len;
+            int n_args;
+        } u_fun_bc;
+        struct { // for O_BOUND_METH
+            py_obj_t meth;
+            py_obj_t self;
+        } u_bound_meth;
+        struct { // for O_LIST
+            int alloc;
+            int len;
+            py_obj_t *items;
+        } u_list;
+        struct { // for O_SET
+            int alloc;
+            int used;
+            py_obj_t *table;
+        } u_set;
+        py_map_t u_map; // for O_MAP
+        /*
+        struct { // for O_MAP
+            int alloc;
+            int used;
+            py_map_elem_t *table;
+        } u_map;
+        */
+        struct { // for O_CLASS
+            py_map_t *map;
+        } u_class;
+    };
+} py_obj_base_t;
+
+py_obj_t py_const_none;
+py_obj_t py_const_false;
+py_obj_t py_const_true;
+
+py_map_t map_name;
+py_map_t map_builtins;
+
+// approximatelly doubling primes; made with Mathematica command: Table[Prime[Floor[(1.7)^n]], {n, 3, 24}]
+static int doubling_primes[] = {7, 19, 43, 89, 179, 347, 647, 1229, 2297, 4243, 7829, 14347, 26017, 47149, 84947, 152443, 273253, 488399, 869927, 1547173, 2745121, 4861607};
+
+int get_doubling_prime_greater_or_equal_to(int x) {
+    for (int i = 0; i < sizeof(doubling_primes) / sizeof(int); i++) {
+        if (doubling_primes[i] >= x) {
+            return doubling_primes[i];
+        }
+    }
+    // ran out of primes in the table!
+    // return something sensible, at least make it odd
+    return x | 1;
+}
+
+void py_map_init(py_map_t *map, py_map_kind_t kind, int n) {
+    map->kind = kind;
+    map->alloc = get_doubling_prime_greater_or_equal_to(n + 1);
+    map->used = 0;
+    map->table = m_new(py_map_elem_t, map->alloc);
+    for (int i = 0; i < map->alloc; i++) {
+        map->table[i].key = NULL;
+        map->table[i].value = NULL;
+    }
+}
+
+py_map_t *py_map_new(py_map_kind_t kind, int n) {
+    py_map_t *map = m_new(py_map_t, 1);
+    py_map_init(map, kind, n);
+    return map;
+}
+
+int py_obj_hash(py_obj_t o_in) {
+    if (IS_SMALL_INT(o_in)) {
+        return FROM_SMALL_INT(o_in);
+    } else if (IS_O(o_in, O_STR)) {
+        return ((py_obj_base_t*)o_in)->u_str;
+    } else {
+        assert(0);
+        return 0;
+    }
+}
+
+bool py_obj_equal(py_obj_t o1, py_obj_t o2) {
+    if (o1 == o2) {
+        return true;
+    } else if (IS_SMALL_INT(o1) && IS_SMALL_INT(o2)) {
+        return false;
+    } else if (IS_O(o1, O_STR) && IS_O(o2, O_STR)) {
+        return ((py_obj_base_t*)o1)->u_str == ((py_obj_base_t*)o2)->u_str;
+    } else {
+        assert(0);
+        return false;
+    }
+}
+
+py_map_elem_t* py_map_lookup_helper(py_map_t *map, py_obj_t index, bool add_if_not_found) {
+    bool is_map_py_obj = (map->kind == MAP_PY_OBJ);
+    machine_uint_t hash;
+    if (is_map_py_obj) {
+        hash = py_obj_hash(index);
+    } else {
+        hash = (machine_uint_t)index;
+    }
+    uint pos = hash % map->alloc;
+    for (;;) {
+        py_map_elem_t *elem = &map->table[pos];
+        if (elem->key == NULL) {
+            // not in table
+            if (add_if_not_found) {
+                if (map->used + 1 >= map->alloc) {
+                    // not enough room in table, rehash it
+                    int old_alloc = map->alloc;
+                    py_map_elem_t *old_table = map->table;
+                    map->alloc = get_doubling_prime_greater_or_equal_to(map->alloc + 1);
+                    map->used = 0;
+                    map->table = m_new(py_map_elem_t, map->alloc);
+                    for (int i = 0; i < old_alloc; i++) {
+                        if (old_table[i].key != NULL) {
+                            py_map_lookup_helper(map, old_table[i].key, true)->value = old_table[i].value;
+                        }
+                    }
+                    m_free(old_table);
+                    // restart the search for the new element
+                    pos = hash % map->alloc;
+                } else {
+                    map->used += 1;
+                    elem->key = index;
+                    return elem;
+                }
+            } else {
+                return NULL;
+            }
+        } else if (elem->key == index || (is_map_py_obj && py_obj_equal(elem->key, index))) {
+            // found it
+            if (add_if_not_found) {
+                elem->key = index;
+            }
+            return elem;
+        } else {
+            // not yet found, keep searching in this table
+            pos = (pos + 1) % map->alloc;
+        }
+    }
+}
+
+py_map_elem_t* py_qstr_map_lookup(py_map_t *map, qstr index, bool add_if_not_found) {
+    py_obj_t o = (py_obj_t)(machine_uint_t)index;
+    return py_map_lookup_helper(map, o, add_if_not_found);
+}
+
+py_map_elem_t* py_map_lookup(py_obj_t o, py_obj_t index, bool add_if_not_found) {
+    assert(IS_O(o, O_MAP));
+    return py_map_lookup_helper(&((py_obj_base_t *)o)->u_map, index, add_if_not_found);
+}
+
+static bool fit_small_int(py_small_int_t o) {
+    return true;
+}
+
+py_obj_t py_obj_new_const(const char *id) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_CONST;
+    o->id = id;
+    return (py_obj_t)o;
+}
+
+py_obj_t py_obj_new_str(qstr qstr) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_STR;
+    o->u_str = qstr;
+    return (py_obj_t)o;
+}
+
+#ifdef PY_FLOAT
+py_obj_t py_obj_new_float(float_t val) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_FLOAT;
+    o->flt = val;
+    return (py_obj_t)o;
+}
+#endif
+
+py_obj_t list_append(py_obj_t self_in, py_obj_t arg) {
+    assert(IS_O(self_in, O_LIST));
+    py_obj_base_t *self = self_in;
+    if (self->u_list.len >= self->u_list.alloc) {
+        self->u_list.alloc *= 2;
+        self->u_list.items = m_renew(py_obj_t, self->u_list.items, self->u_list.alloc);
+    }
+    self->u_list.items[self->u_list.len++] = arg;
+    return arg;
+}
+
+static qstr q_append;
+static qstr q_print;
+static qstr q_len;
+static qstr q___build_class__;
+
+typedef enum {
+    PY_CODE_NATIVE,
+    PY_CODE_BYTE,
+} py_code_kind_t;
+
+typedef struct _py_code_t {
+    py_code_kind_t kind;
+    int n_args;
+    union {
+        struct {
+            py_fun_t fun;
+        } u_native;
+        struct {
+            byte *code;
+            uint len;
+        } u_byte;
+    };
+} py_code_t;
+
+static int next_unique_code_id;
+static py_code_t *unique_codes;
+
+py_obj_t fun_list_append;
+
+py_obj_t py_builtin_print(py_obj_t o) {
+    if (IS_O(o, O_STR)) {
+        // special case, print string raw
+        printf("%s\n", qstr_str(((py_obj_base_t*)o)->u_str));
+    } else {
+        // print the object Python style
+        py_obj_print(o);
+        printf("\n");
+    }
+    return py_const_none;
+}
+
+py_obj_t py_builtin_len(py_obj_t o_in) {
+    py_small_int_t len = 0;
+    if (IS_O(o_in, O_LIST)) {
+        py_obj_base_t *o = o_in;
+        len = o->u_list.len;
+    } else if (IS_O(o_in, O_MAP)) {
+        py_obj_base_t *o = o_in;
+        len = o->u_map.used;
+    } else {
+        assert(0);
+    }
+    return TO_SMALL_INT(len);
+}
+
+py_obj_t py_builtin___build_class__(py_obj_t o1, py_obj_t o2) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_CLASS;
+    o->u_class.map = py_map_new(MAP_QSTR, 0);
+    return o;
+}
+
+FILE *fp_native = NULL;
+
+void rt_init() {
+    q_append = qstr_from_str_static("append");
+    q_print = qstr_from_str_static("print");
+    q_len = qstr_from_str_static("len");
+    q___build_class__ = qstr_from_str_static("__build_class__");
+
+    py_const_none = py_obj_new_const("None");
+    py_const_false = py_obj_new_const("False");
+    py_const_true = py_obj_new_const("True");
+
+    py_map_init(&map_name, MAP_QSTR, 0);
+
+    py_map_init(&map_builtins, MAP_QSTR, 3);
+    py_qstr_map_lookup(&map_builtins, q_print, true)->value = rt_make_function_1(py_builtin_print);
+    py_qstr_map_lookup(&map_builtins, q_len, true)->value = rt_make_function_1(py_builtin_len);
+    py_qstr_map_lookup(&map_builtins, q___build_class__, true)->value = rt_make_function_2(py_builtin___build_class__);
+
+    next_unique_code_id = 1;
+    unique_codes = NULL;
+
+    fun_list_append = rt_make_function_2(list_append);
+
+    fp_native = fopen("out-native", "wb");
+}
+
+void rt_deinit() {
+    if (fp_native != NULL) {
+        fclose(fp_native);
+    }
+}
+
+int rt_get_new_unique_code_id() {
+    return next_unique_code_id++;
+}
+
+void rt_assign_native_code(int unique_code_id, py_fun_t fun, uint len, int n_args) {
+    if (unique_codes == NULL) {
+        unique_codes = m_new(py_code_t, next_unique_code_id);
+    }
+    assert(unique_code_id < next_unique_code_id);
+    unique_codes[unique_code_id].kind = PY_CODE_NATIVE;
+    unique_codes[unique_code_id].n_args = n_args;
+    unique_codes[unique_code_id].u_native.fun = fun;
+
+    DEBUG_printf("assign native code: id=%d fun=%p len=%u n_args=%d\n", unique_code_id, fun, len, n_args);
+    byte *fun_data = (byte*)(((machine_uint_t)fun) & (~1)); // need to clear lower bit in case it's thumb code
+    for (int i = 0; i < 128 && i < len; i++) {
+        if (i > 0 && i % 16 == 0) {
+            DEBUG_printf("\n");
+        }
+        DEBUG_printf(" %02x", fun_data[i]);
+    }
+    DEBUG_printf("\n");
+
+    if (fp_native != NULL) {
+        fwrite(fun_data, len, 1, fp_native);
+    }
+}
+
+void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args) {
+    if (unique_codes == NULL) {
+        unique_codes = m_new(py_code_t, next_unique_code_id);
+    }
+    assert(unique_code_id < next_unique_code_id);
+    unique_codes[unique_code_id].kind = PY_CODE_BYTE;
+    unique_codes[unique_code_id].n_args = n_args;
+    unique_codes[unique_code_id].u_byte.code = code;
+    unique_codes[unique_code_id].u_byte.len = len;
+
+    DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d\n", unique_code_id, code, len, n_args);
+}
+
+const char *py_obj_get_type_str(py_obj_t o_in) {
+    if (IS_SMALL_INT(o_in)) {
+        return "int";
+    } else {
+        py_obj_base_t *o = o_in;
+        switch (o->kind) {
+            case O_CONST:
+                if (o == py_const_none) {
+                    return "NoneType";
+                } else {
+                    return "bool";
+                }
+            case O_STR:
+                return "str";
+#ifdef PY_FLOAT
+            case O_FLOAT:
+                return "float";
+#endif
+            case O_LIST:
+                return "list";
+            case O_SET:
+                return "set";
+            case O_MAP:
+                return "dict";
+            default:
+                assert(0);
+                return "UnknownType";
+        }
+    }
+}
+
+void py_obj_print(py_obj_t o_in) {
+    if (IS_SMALL_INT(o_in)) {
+        printf("%d", (int)FROM_SMALL_INT(o_in));
+    } else {
+        py_obj_base_t *o = o_in;
+        switch (o->kind) {
+            case O_CONST:
+                printf("%s", o->id);
+                break;
+            case O_STR:
+                // TODO need to escape chars etc
+                printf("'%s'", qstr_str(o->u_str));
+                break;
+#ifdef PY_FLOAT
+            case O_FLOAT:
+                printf("%f", o->flt);
+                break;
+#endif
+            case O_LIST:
+                printf("[");
+                for (int i = 0; i < o->u_list.len; i++) {
+                    if (i > 0) {
+                        printf(", ");
+                    }
+                    py_obj_print(o->u_list.items[i]);
+                }
+                printf("]");
+                break;
+            case O_SET:
+            {
+                bool first = true;
+                printf("{");
+                for (int i = 0; i < o->u_set.alloc; i++) {
+                    if (o->u_set.table[i] != NULL) {
+                        if (!first) {
+                            printf(", ");
+                        }
+                        first = false;
+                        py_obj_print(o->u_set.table[i]);
+                    }
+                }
+                printf("}");
+                break;
+            }
+            case O_MAP:
+            {
+                bool first = true;
+                printf("{");
+                for (int i = 0; i < o->u_map.alloc; i++) {
+                    if (o->u_map.table[i].key != NULL) {
+                        if (!first) {
+                            printf(", ");
+                        }
+                        first = false;
+                        py_obj_print(o->u_map.table[i].key);
+                        printf(": ");
+                        py_obj_print(o->u_map.table[i].value);
+                    }
+                }
+                printf("}");
+                break;
+            }
+            default:
+                assert(0);
+        }
+    }
+}
+
+int rt_is_true(py_obj_t arg) {
+    DEBUG_OP_printf("is true %p\n", arg);
+    if (IS_SMALL_INT(arg)) {
+        if (FROM_SMALL_INT(arg) == 0) {
+            return 0;
+        } else {
+            return 1;
+        }
+    } else if (arg == py_const_none) {
+        return 0;
+    } else if (arg == py_const_false) {
+        return 0;
+    } else if (arg == py_const_true) {
+        return 1;
+    } else {
+        assert(0);
+        return 0;
+    }
+}
+
+int rt_get_int(py_obj_t arg) {
+    if (IS_SMALL_INT(arg)) {
+        return FROM_SMALL_INT(arg);
+    } else {
+        assert(0);
+        return 0;
+    }
+}
+
+py_obj_t rt_load_const_str(qstr qstr) {
+    DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
+    return py_obj_new_str(qstr);
+}
+
+py_obj_t rt_load_name(qstr qstr) {
+    // logic: search locals, globals, builtins
+    DEBUG_OP_printf("load %s\n", qstr_str(qstr));
+    py_map_elem_t *elem = py_qstr_map_lookup(&map_name, qstr, false);
+    if (elem == NULL) {
+        elem = py_qstr_map_lookup(&map_builtins, qstr, false);
+        if (elem == NULL) {
+            printf("name doesn't exist: %s\n", qstr_str(qstr));
+            assert(0);
+        }
+    }
+    return elem->value;
+}
+
+py_obj_t rt_load_global(qstr qstr) {
+    return rt_load_name(qstr); // TODO
+}
+
+py_obj_t rt_load_build_class() {
+    DEBUG_OP_printf("load_build_class\n");
+    py_map_elem_t *elem = py_qstr_map_lookup(&map_builtins, q___build_class__, false);
+    if (elem == NULL) {
+        printf("name doesn't exist: __build_class__\n");
+        assert(0);
+    }
+    return elem->value;
+}
+
+void rt_store_name(qstr qstr, py_obj_t obj) {
+    DEBUG_OP_printf("store %s <- %p\n", qstr_str(qstr), obj);
+    py_qstr_map_lookup(&map_name, qstr, true)->value = obj;
+}
+
+py_obj_t rt_unary_op(int op, py_obj_t arg) {
+    assert(0);
+    return py_const_none;
+}
+
+py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs) {
+    DEBUG_OP_printf("binary %d %p %p\n", op, lhs, rhs);
+    if (op == RT_BINARY_OP_SUBSCR) {
+        if (IS_O(lhs, O_LIST) && IS_SMALL_INT(rhs)) {
+            return ((py_obj_base_t*)lhs)->u_list.items[FROM_SMALL_INT(rhs)];
+        } else {
+            assert(0);
+        }
+    } else if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) {
+        py_small_int_t val;
+        switch (op) {
+            case RT_BINARY_OP_ADD:
+            case RT_BINARY_OP_INPLACE_ADD: val = FROM_SMALL_INT(lhs) + FROM_SMALL_INT(rhs); break;
+            case RT_BINARY_OP_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break;
+            case RT_BINARY_OP_MULTIPLY: val = FROM_SMALL_INT(lhs) * FROM_SMALL_INT(rhs); break;
+            case RT_BINARY_OP_FLOOR_DIVIDE: val = FROM_SMALL_INT(lhs) / FROM_SMALL_INT(rhs); break;
+#ifdef PY_FLOAT
+            case RT_BINARY_OP_TRUE_DIVIDE: return py_obj_new_float((float_t)FROM_SMALL_INT(lhs) / (float_t)FROM_SMALL_INT(rhs));
+#endif
+            default: printf("%d\n", op); assert(0); val = 0;
+        }
+        if (fit_small_int(val)) {
+            return TO_SMALL_INT(val);
+        }
+    } else if (IS_O(lhs, O_STR) && IS_O(rhs, O_STR)) {
+        const char *lhs_str = qstr_str(((py_obj_base_t*)lhs)->u_str);
+        const char *rhs_str = qstr_str(((py_obj_base_t*)rhs)->u_str);
+        char *val;
+        switch (op) {
+            case RT_BINARY_OP_ADD:
+            case RT_BINARY_OP_INPLACE_ADD: val = m_new(char, strlen(lhs_str) + strlen(rhs_str) + 1); strcpy(val, lhs_str); strcat(val, rhs_str); break;
+            default: printf("%d\n", op); assert(0); val = NULL;
+        }
+        return py_obj_new_str(qstr_from_str_take(val));
+    }
+    assert(0);
+    return py_const_none;
+}
+
+py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs) {
+    DEBUG_OP_printf("compare %d %p %p\n", op, lhs, rhs);
+    if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) {
+        int cmp;
+        switch (op) {
+            case RT_COMPARE_OP_LESS: cmp = FROM_SMALL_INT(lhs) < FROM_SMALL_INT(rhs); break;
+            case RT_COMPARE_OP_MORE: cmp = FROM_SMALL_INT(lhs) > FROM_SMALL_INT(rhs); break;
+            default: assert(0); cmp = 0;
+        }
+        if (cmp) {
+            return py_const_true;
+        } else {
+            return py_const_false;
+        }
+    }
+    assert(0);
+    return py_const_none;
+}
+
+py_obj_t rt_make_function_from_id(int unique_code_id) {
+    if (unique_code_id >= next_unique_code_id) {
+        // illegal code id
+        return py_const_none;
+    }
+    py_code_t *c = &unique_codes[unique_code_id];
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    switch (c->kind) {
+        case PY_CODE_NATIVE:
+            switch (c->n_args) {
+                case 0: o->kind = O_FUN_0; break;
+                case 1: o->kind = O_FUN_1; break;
+                case 2: o->kind = O_FUN_2; break;
+                default: assert(0);
+            }
+            o->u_fun.fun = c->u_native.fun;
+            break;
+        case PY_CODE_BYTE:
+            o->kind = O_FUN_BC;
+            o->u_fun_bc.code = c->u_byte.code;
+            o->u_fun_bc.len = c->u_byte.len;
+            o->u_fun_bc.n_args = c->n_args;
+            break;
+        default:
+            assert(0);
+    }
+    return o;
+}
+
+py_obj_t rt_make_function_0(py_fun_0_t fun) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_FUN_0;
+    o->u_fun.fun = fun;
+    return o;
+}
+
+py_obj_t rt_make_function_1(py_fun_1_t fun) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_FUN_1;
+    o->u_fun.fun = fun;
+    return o;
+}
+
+py_obj_t rt_make_function_2(py_fun_2_t fun) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_FUN_2;
+    o->u_fun.fun = fun;
+    return o;
+}
+
+py_obj_t rt_make_function(int n_args, py_fun_t code) {
+    // assumes code is a pointer to a py_fun_t (i think this is safe...)
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_FUN_N;
+    o->u_fun.fun = code;
+    o->u_fun.n_args = n_args;
+    return o;
+}
+
+py_obj_t rt_call_function_0(py_obj_t fun) {
+    if (IS_O(fun, O_FUN_0)) {
+        py_obj_base_t *o = fun;
+        DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
+        return ((py_fun_0_t)o->u_fun.fun)();
+    } else if (IS_O(fun, O_FUN_BC)) {
+        py_obj_base_t *o = fun;
+        assert(o->u_fun_bc.n_args == 0);
+        DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
+        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, NULL, 0);
+    } else {
+        printf("fun0:%p\n", fun);
+        assert(0);
+        return py_const_none;
+    }
+}
+
+py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) {
+    if (IS_O(fun, O_FUN_1)) {
+        py_obj_base_t *o = fun;
+        DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
+        return ((py_fun_1_t)o->u_fun.fun)(arg);
+    } else if (IS_O(fun, O_FUN_BC)) {
+        py_obj_base_t *o = fun;
+        assert(o->u_fun_bc.n_args == 1);
+        DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
+        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &arg, 1);
+    } else if (IS_O(fun, O_BOUND_METH)) {
+        py_obj_base_t *o = fun;
+        return rt_call_function_2(o->u_bound_meth.meth, o->u_bound_meth.self, arg);
+    } else {
+        printf("fun1:%p\n", fun);
+        assert(0);
+        return py_const_none;
+    }
+}
+
+py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) {
+    if (IS_O(fun, O_FUN_2)) {
+        py_obj_base_t *o = fun;
+        DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun);
+        return ((py_fun_2_t)o->u_fun.fun)(arg1, arg2);
+    } else if (IS_O(fun, O_FUN_BC)) {
+        py_obj_base_t *o = fun;
+        assert(o->u_fun_bc.n_args == 2);
+        DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code);
+        py_obj_t args[2];
+        args[0] = arg1;
+        args[1] = arg2;
+        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &args[0], 2);
+    } else {
+        assert(0);
+        return py_const_none;
+    }
+}
+
+py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self) {
+    DEBUG_OP_printf("call method %p %p\n", fun, self);
+    if (self == NULL) {
+        return rt_call_function_0(fun);
+    } else {
+        return rt_call_function_1(fun, self);
+    }
+}
+
+py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg) {
+    DEBUG_OP_printf("call method %p %p %p\n", fun, self, arg);
+    if (self == NULL) {
+        return rt_call_function_1(fun, arg);
+    } else {
+        return rt_call_function_2(fun, self, arg);
+    }
+}
+
+// items are in reverse order
+py_obj_t rt_build_list(int n_args, py_obj_t *items) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_LIST;
+    o->u_list.alloc = n_args;
+    if (o->u_list.alloc < 4) {
+        o->u_list.alloc = 4;
+    }
+    o->u_list.len = n_args;
+    o->u_list.items = m_new(py_obj_t, o->u_list.alloc);
+    for (int i = 0; i < n_args; i++) {
+        o->u_list.items[i] = items[n_args - i - 1];
+    }
+    return o;
+}
+
+py_obj_t py_set_lookup(py_obj_t o_in, py_obj_t index, bool add_if_not_found) {
+    assert(IS_O(o_in, O_SET));
+    py_obj_base_t *o = o_in;
+    int hash = py_obj_hash(index);
+    int pos = hash % o->u_set.alloc;
+    for (;;) {
+        py_obj_t elem = o->u_set.table[pos];
+        if (elem == NULL) {
+            // not in table
+            if (add_if_not_found) {
+                if (o->u_set.used + 1 >= o->u_set.alloc) {
+                    // not enough room in table, rehash it
+                    int old_alloc = o->u_set.alloc;
+                    py_obj_t *old_table = o->u_set.table;
+                    o->u_set.alloc = get_doubling_prime_greater_or_equal_to(o->u_set.alloc + 1);
+                    o->u_set.used = 0;
+                    o->u_set.table = m_new(py_obj_t, o->u_set.alloc);
+                    for (int i = 0; i < old_alloc; i++) {
+                        if (old_table[i] != NULL) {
+                            py_set_lookup(o, old_table[i], true);
+                        }
+                    }
+                    m_free(old_table);
+                    // restart the search for the new element
+                    pos = hash % o->u_set.alloc;
+                } else {
+                    o->u_set.used += 1;
+                    o->u_set.table[pos] = index;
+                    return index;
+                }
+            } else {
+                return NULL;
+            }
+        } else if (py_obj_equal(elem, index)) {
+            // found it
+            return elem;
+        } else {
+            // not yet found, keep searching in this table
+            pos = (pos + 1) % o->u_set.alloc;
+        }
+    }
+}
+
+py_obj_t rt_build_set(int n_args, py_obj_t *items) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_SET;
+    o->u_set.alloc = get_doubling_prime_greater_or_equal_to(n_args + 1);
+    o->u_set.used = 0;
+    o->u_set.table = m_new(py_obj_t, o->u_set.alloc);
+    for (int i = 0; i < o->u_set.alloc; i++) {
+        o->u_set.table[i] = NULL;
+    }
+    for (int i = 0; i < n_args; i++) {
+        py_set_lookup(o, items[i], true);
+    }
+    return o;
+}
+
+py_obj_t rt_build_map(int n_args) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_MAP;
+    py_map_init(&o->u_map, MAP_PY_OBJ, n_args);
+    return o;
+}
+
+py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value) {
+    assert(IS_O(map, O_MAP)); // should always be
+    py_map_lookup(map, key, true)->value = value;
+    return map;
+}
+
+void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t value) {
+    if (IS_O(base, O_LIST) && IS_SMALL_INT(index)) {
+        // list store
+        py_obj_base_t *o = base;
+        int idx = FROM_SMALL_INT(index);
+        if (idx < 0) {
+            idx += o->u_list.len;
+        }
+        if (0 <= idx && idx < o->u_list.len) {
+            o->u_list.items[idx] = value;
+        } else {
+            assert(0);
+        }
+    } else if (IS_O(base, O_MAP)) {
+        // map store
+        py_map_lookup(base, index, true)->value = value;
+    } else {
+        assert(0);
+    }
+}
+
+py_obj_t build_bound_method(py_obj_t self, py_obj_t meth) {
+    py_obj_base_t *o = m_new(py_obj_base_t, 1);
+    o->kind = O_BOUND_METH;
+    o->u_bound_meth.meth = meth;
+    o->u_bound_meth.self = self;
+    return o;
+}
+
+py_obj_t rt_load_attr(py_obj_t base, qstr attr) {
+    DEBUG_OP_printf("load %s\n", qstr_str(attr));
+    if (IS_O(base, O_LIST) && attr == q_append) {
+        return build_bound_method(base, fun_list_append);
+    } else if (IS_O(base, O_CLASS)) {
+        py_obj_base_t *o = base;
+        py_map_elem_t *elem = py_qstr_map_lookup(o->u_class.map, attr, false);
+        if (elem == NULL) {
+            printf("Nope! %s\n", qstr_str(attr));
+            assert(0);
+        }
+        return elem->value;
+    } else {
+        printf("AttributeError: '%s' object has no attribute '%s'\n", py_obj_get_type_str(base), qstr_str(attr));
+        assert(0);
+        return py_const_none;
+    }
+}
+
+void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest) {
+    DEBUG_OP_printf("load method %s\n", qstr_str(attr));
+    if (IS_O(base, O_LIST) && attr == q_append) {
+        dest[1] = fun_list_append;
+        dest[0] = base;
+    } else {
+        dest[1] = rt_load_attr(base, attr);
+        dest[0] = NULL;
+    }
+}
+
+void *rt_fun_table[RT_F_NUMBER_OF] = {
+    rt_load_const_str,
+    rt_load_name,
+    rt_load_global,
+    rt_load_attr,
+    rt_load_method,
+    rt_store_name,
+    rt_store_subscr,
+    rt_is_true,
+    rt_unary_op,
+    rt_build_list,
+    rt_build_map,
+    rt_store_map,
+    rt_build_set,
+    rt_make_function_from_id,
+    rt_call_function_0,
+    rt_call_function_1,
+    rt_call_function_2,
+    rt_call_method_1,
+    rt_call_method_2,
+    rt_binary_op,
+    rt_compare_op,
+};
+
+/*
+void rt_f_vector(rt_fun_kind_t fun_kind) {
+    (rt_f_table[fun_kind])();
+}
+*/
diff --git a/py/runtime.h b/py/runtime.h
new file mode 100644
index 0000000..4c842b2
--- /dev/null
+++ b/py/runtime.h
@@ -0,0 +1,121 @@
+typedef enum {
+    RT_UNARY_OP_NOT,
+    RT_UNARY_OP_POSITIVE,
+    RT_UNARY_OP_NEGATIVE,
+    RT_UNARY_OP_INVERT,
+} rt_unary_op_t;
+
+typedef enum {
+    RT_BINARY_OP_SUBSCR,
+    RT_BINARY_OP_OR,
+    RT_BINARY_OP_XOR,
+    RT_BINARY_OP_AND,
+    RT_BINARY_OP_LSHIFT,
+    RT_BINARY_OP_RSHIFT,
+    RT_BINARY_OP_ADD,
+    RT_BINARY_OP_SUBTRACT,
+    RT_BINARY_OP_MULTIPLY,
+    RT_BINARY_OP_FLOOR_DIVIDE,
+    RT_BINARY_OP_TRUE_DIVIDE,
+    RT_BINARY_OP_MODULO,
+    RT_BINARY_OP_POWER,
+    RT_BINARY_OP_INPLACE_OR,
+    RT_BINARY_OP_INPLACE_XOR,
+    RT_BINARY_OP_INPLACE_AND,
+    RT_BINARY_OP_INPLACE_LSHIFT,
+    RT_BINARY_OP_INPLACE_RSHIFT,
+    RT_BINARY_OP_INPLACE_ADD,
+    RT_BINARY_OP_INPLACE_SUBTRACT,
+    RT_BINARY_OP_INPLACE_MULTIPLY,
+    RT_BINARY_OP_INPLACE_FLOOR_DIVIDE,
+    RT_BINARY_OP_INPLACE_TRUE_DIVIDE,
+    RT_BINARY_OP_INPLACE_MODULO,
+    RT_BINARY_OP_INPLACE_POWER,
+} rt_binary_op_t;
+
+typedef enum {
+    RT_COMPARE_OP_LESS,
+    RT_COMPARE_OP_MORE,
+    RT_COMPARE_OP_EQUAL,
+    RT_COMPARE_OP_LESS_EQUAL,
+    RT_COMPARE_OP_MORE_EQUAL,
+    RT_COMPARE_OP_NOT_EQUAL,
+    RT_COMPARE_OP_IN,
+    RT_COMPARE_OP_NOT_IN,
+    RT_COMPARE_OP_IS,
+    RT_COMPARE_OP_IS_NOT,
+    RT_COMPARE_OP_EXCEPTION_MATCH,
+} rt_compare_op_t;
+
+typedef enum {
+    RT_F_LOAD_CONST_STR = 0,
+    RT_F_LOAD_NAME,
+    RT_F_LOAD_GLOBAL,
+    RT_F_LOAD_ATTR,
+    RT_F_LOAD_METHOD,
+    RT_F_STORE_NAME,
+    RT_F_STORE_SUBSCR,
+    RT_F_IS_TRUE,
+    RT_F_UNARY_OP,
+    RT_F_BUILD_LIST,
+    RT_F_BUILD_MAP,
+    RT_F_STORE_MAP,
+    RT_F_BUILD_SET,
+    RT_F_MAKE_FUNCTION_FROM_ID,
+    RT_F_CALL_FUNCTION_0,
+    RT_F_CALL_FUNCTION_1,
+    RT_F_CALL_FUNCTION_2,
+    RT_F_CALL_METHOD_1,
+    RT_F_CALL_METHOD_2,
+    RT_F_BINARY_OP,
+    RT_F_COMPARE_OP,
+    RT_F_NUMBER_OF,
+} rt_fun_kind_t;
+
+extern void *rt_fun_table[RT_F_NUMBER_OF];
+
+typedef machine_ptr_t py_obj_t; // must be of pointer size
+typedef py_obj_t (*py_fun_0_t)();
+typedef py_obj_t (*py_fun_1_t)(py_obj_t);
+typedef py_obj_t (*py_fun_2_t)(py_obj_t, py_obj_t);
+typedef py_obj_t (*py_fun_t)();
+
+extern py_obj_t py_const_none;
+extern py_obj_t py_const_false;
+extern py_obj_t py_const_true;
+
+void rt_init();
+void rt_deinit();
+int rt_get_new_unique_code_id();
+void rt_assign_native_code(int unique_code_id, py_fun_t f, uint len, int n_args);
+void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args);
+py_fun_t rt_get_code(qstr id);
+void py_obj_print(py_obj_t o);
+int rt_is_true(py_obj_t arg);
+int rt_get_int(py_obj_t arg);
+py_obj_t rt_load_const_str(qstr qstr);
+//py_obj_t rt_load_const_code(qstr qstr);
+py_obj_t rt_load_name(qstr qstr);
+py_obj_t rt_load_global(qstr qstr);
+py_obj_t rt_load_build_class();
+void rt_store_name(qstr qstr, py_obj_t obj);
+py_obj_t rt_unary_op(int op, py_obj_t arg);
+py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs);
+py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs);
+py_obj_t rt_make_function_from_id(int unique_code_id);
+py_obj_t rt_make_function_0(py_fun_0_t f);
+py_obj_t rt_make_function_1(py_fun_1_t f);
+py_obj_t rt_make_function_2(py_fun_2_t f);
+py_obj_t rt_make_function(int n_args, py_fun_t code);
+py_obj_t rt_call_function_0(py_obj_t fun);
+py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg);
+py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2);
+py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self);
+py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg);
+py_obj_t rt_build_list(int n_args, py_obj_t *items);
+py_obj_t rt_build_map(int n_args);
+py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value);
+py_obj_t rt_build_set(int n_args, py_obj_t *items);
+void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t val);
+py_obj_t rt_load_attr(py_obj_t base, qstr attr);
+void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest);
diff --git a/py/scope.c b/py/scope.c
new file mode 100644
index 0000000..a715b2b
--- /dev/null
+++ b/py/scope.c
@@ -0,0 +1,218 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "misc.h"
+#include "machine.h"
+#include "parse.h"
+#include "scope.h"
+
+scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn) {
+    scope_t *scope = m_new(scope_t, 1);
+    scope->kind = kind;
+    scope->parent = NULL;
+    scope->next = NULL;
+    scope->pn = pn;
+    switch (kind) {
+        case SCOPE_MODULE:
+            scope->simple_name = 0;
+            break;
+        case SCOPE_FUNCTION:
+        case SCOPE_CLASS:
+            assert(PY_PARSE_NODE_IS_STRUCT(pn));
+            scope->simple_name = PY_PARSE_NODE_LEAF_ARG(((py_parse_node_struct_t*)pn)->nodes[0]);
+            break;
+        case SCOPE_LAMBDA:
+            scope->simple_name = qstr_from_str_static("<lambda>");
+            break;
+        case SCOPE_LIST_COMP:
+            scope->simple_name = qstr_from_str_static("<listcomp>");
+            break;
+        case SCOPE_DICT_COMP:
+            scope->simple_name = qstr_from_str_static("<dictcomp>");
+            break;
+        case SCOPE_SET_COMP:
+            scope->simple_name = qstr_from_str_static("<setcomp>");
+            break;
+        case SCOPE_GEN_EXPR:
+            scope->simple_name = qstr_from_str_static("<genexpr>");
+            break;
+        default:
+            assert(0);
+    }
+    scope->id_info_alloc = 8;
+    scope->id_info_len = 0;
+    scope->id_info = m_new(id_info_t, scope->id_info_alloc);
+
+    scope->flags = 0;
+    scope->num_params = 0;
+    /* not needed
+    scope->num_default_params = 0;
+    scope->num_dict_params = 0;
+    */
+    scope->num_locals = 0;
+    scope->unique_code_id = 0;
+
+    return scope;
+}
+
+id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added) {
+    for (int i = 0; i < scope->id_info_len; i++) {
+        if (scope->id_info[i].qstr == qstr) {
+            *added = false;
+            return &scope->id_info[i];
+        }
+    }
+
+    // make sure we have enough memory
+    if (scope->id_info_len >= scope->id_info_alloc) {
+        scope->id_info_alloc *= 2;
+        scope->id_info = m_renew(id_info_t, scope->id_info, scope->id_info_alloc);
+    }
+
+    id_info_t *id_info;
+
+    {
+    /*
+    // just pick next slot in array
+    id_info = &scope->id_info[scope->id_info_len++];
+    */
+    }
+
+    {
+    // sort insert into id_info array, so we are equivalent to CPython (no other reason to do it)
+    scope->id_info_len += 1;
+    for (int i = scope->id_info_len - 1;; i--) {
+        if (i == 0 || strcmp(qstr_str(scope->id_info[i - 1].qstr), qstr_str(qstr)) < 0) {
+            id_info = &scope->id_info[i];
+            break;
+        } else {
+            scope->id_info[i] = scope->id_info[i - 1];
+        }
+    }
+    }
+
+    id_info->param = false;
+    id_info->kind = 0;
+    id_info->qstr = qstr;
+    *added = true;
+    return id_info;
+}
+
+id_info_t *scope_find(scope_t *scope, qstr qstr) {
+    for (int i = 0; i < scope->id_info_len; i++) {
+        if (scope->id_info[i].qstr == qstr) {
+            return &scope->id_info[i];
+        }
+    }
+    return NULL;
+}
+
+id_info_t *scope_find_global(scope_t *scope, qstr qstr) {
+    while (scope->parent != NULL) {
+        scope = scope->parent;
+    }
+    for (int i = 0; i < scope->id_info_len; i++) {
+        if (scope->id_info[i].qstr == qstr) {
+            return &scope->id_info[i];
+        }
+    }
+    return NULL;
+}
+
+id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr) {
+    if (scope->parent == NULL) {
+        return NULL;
+    }
+    for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) {
+        for (int i = 0; i < s->id_info_len; i++) {
+            if (s->id_info[i].qstr == qstr) {
+                return &s->id_info[i];
+            }
+        }
+    }
+    return NULL;
+}
+
+void scope_close_over_in_parents(scope_t *scope, qstr qstr) {
+    assert(scope->parent != NULL); // we should have at least 1 parent
+    for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) {
+        id_info_t *id = NULL;
+        for (int i = 0; i < s->id_info_len; i++) {
+            if (s->id_info[i].qstr == qstr) {
+                id = &s->id_info[i];
+                break;
+            }
+        }
+        if (id == NULL) {
+            // variable not declared in this scope, so declare it as free and keep searching parents
+            bool added;
+            id = scope_find_or_add_id(s, qstr, &added);
+            assert(added);
+            id->kind = ID_INFO_KIND_FREE;
+        } else {
+            // variable is declared in this scope, so finish
+            switch (id->kind) {
+                case ID_INFO_KIND_LOCAL: id->kind = ID_INFO_KIND_CELL; break; // variable local to this scope, close it over
+                case ID_INFO_KIND_FREE: break; // variable already closed over in a parent scope
+                case ID_INFO_KIND_CELL: break; // variable already closed over in this scope
+                default: assert(0); // TODO
+            }
+            return;
+        }
+    }
+    assert(0); // we should have found the variable in one of the parents
+}
+
+void scope_print_info(scope_t *s) {
+    if (s->kind == SCOPE_MODULE) {
+        printf("code <module>\n");
+    } else if (s->kind == SCOPE_LAMBDA) {
+        printf("code <lambda>\n");
+    } else if (s->kind == SCOPE_LIST_COMP) {
+        printf("code <listcomp>\n");
+    } else if (s->kind == SCOPE_DICT_COMP) {
+        printf("code <dictcomp>\n");
+    } else if (s->kind == SCOPE_SET_COMP) {
+        printf("code <setcomp>\n");
+    } else if (s->kind == SCOPE_GEN_EXPR) {
+        printf("code <genexpr>\n");
+    } else {
+        printf("code %s\n", qstr_str(s->simple_name));
+    }
+    /*
+    printf("var global:");
+    for (int i = 0; i < s->id_info_len; i++) {
+        if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+            printf(" %s", qstr_str(s->id_info[i].qstr));
+        }
+    }
+    printf("\n");
+    printf("var name:");
+    for (int i = 0; i < s->id_info_len; i++) {
+        if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
+            printf(" %s", qstr_str(s->id_info[i].qstr));
+        }
+    }
+    printf("\n");
+    printf("var local:");
+    for (int i = 0; i < s->id_info_len; i++) {
+        if (s->id_info[i].kind == ID_INFO_KIND_LOCAL) {
+            printf(" %s", qstr_str(s->id_info[i].qstr));
+        }
+    }
+    printf("\n");
+    printf("var free:");
+    for (int i = 0; i < s->id_info_len; i++) {
+        if (s->id_info[i].kind == ID_INFO_KIND_FREE) {
+            printf(" %s", qstr_str(s->id_info[i].qstr));
+        }
+    }
+    printf("\n");
+    */
+    printf("     flags %04x\n", s->flags);
+    printf("     argcount %d\n", s->num_params);
+    printf("     nlocals %d\n", s->num_locals);
+    printf("     stacksize %d\n", s->stack_size);
+}
diff --git a/py/scope.h b/py/scope.h
new file mode 100644
index 0000000..2d620fb
--- /dev/null
+++ b/py/scope.h
@@ -0,0 +1,58 @@
+enum {
+    ID_INFO_KIND_GLOBAL_IMPLICIT,
+    ID_INFO_KIND_GLOBAL_EXPLICIT,
+    ID_INFO_KIND_LOCAL, // in a function f, written and only referenced by f
+    ID_INFO_KIND_CELL,  // in a function f, read/written by children of f
+    ID_INFO_KIND_FREE,  // in a function f, belongs to the parent of f
+};
+
+typedef struct _id_info_t {
+    bool param;
+    int kind;
+    qstr qstr;
+    int local_num; // when it's an ID_INFO_KIND_LOCAL this is the unique number of the local
+} id_info_t;
+
+// taken from python source, Include/code.h
+#define SCOPE_FLAG_OPTIMISED    0x0001
+#define SCOPE_FLAG_NEWLOCALS    0x0002
+#define SCOPE_FLAG_VARARGS      0x0004
+#define SCOPE_FLAG_VARKEYWORDS  0x0008
+#define SCOPE_FLAG_NESTED       0x0010
+#define SCOPE_FLAG_GENERATOR    0x0020
+/* The SCOPE_FLAG_NOFREE flag is set if there are no free or cell variables.
+   This information is redundant, but it allows a single flag test
+   to determine whether there is any extra work to be done when the
+   call frame is setup.
+*/
+#define SCOPE_FLAG_NOFREE       0x0040
+
+// scope is a "block" in Python parlance
+typedef enum { SCOPE_MODULE, SCOPE_FUNCTION, SCOPE_LAMBDA, SCOPE_LIST_COMP, SCOPE_DICT_COMP, SCOPE_SET_COMP, SCOPE_GEN_EXPR, SCOPE_CLASS } scope_kind_t;
+typedef struct _scope_t {
+    scope_kind_t kind;
+    struct _scope_t *parent;
+    struct _scope_t *next;
+    py_parse_node_t pn;
+    qstr simple_name;
+    int id_info_alloc;
+    int id_info_len;
+    id_info_t *id_info;
+    int flags;
+    int num_params;
+    /* not needed
+    int num_default_params;
+    int num_dict_params;
+    */
+    int num_locals;
+    int stack_size;
+    int unique_code_id;
+} scope_t;
+
+scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn);
+id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added);
+id_info_t *scope_find(scope_t *scope, qstr qstr);
+id_info_t *scope_find_global(scope_t *scope, qstr qstr);
+id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr);
+void scope_close_over_in_parents(scope_t *scope, qstr qstr);
+void scope_print_info(scope_t *s);