py: Fix bug with LOAD_METHOD; fix int->machine_int_t for small int.

LOAD_METHOD bug was: emitbc did not correctly calculate the amount of
stack usage for a LOAD_METHOD operation.

small int bug was: int was being used to pass small ints, when it should
have been machine_int_t.
diff --git a/py/compile.c b/py/compile.c
index ebd2abb..c1a7955 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -2501,7 +2501,7 @@
     if (MP_PARSE_NODE_IS_NULL(pn)) {
         // pass
     } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
-        int arg = MP_PARSE_NODE_LEAF_ARG(pn);
+        machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
         switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
             case MP_PARSE_NODE_ID: EMIT_ARG(load_id, arg); break;
             case MP_PARSE_NODE_SMALL_INT: EMIT_ARG(load_const_small_int, arg); break;
@@ -3030,7 +3030,8 @@
         scope->flags |= SCOPE_FLAG_OPTIMISED;
 
         // TODO possibly other ways it can be nested
-        if (scope->parent->kind == SCOPE_FUNCTION || (scope->parent->kind == SCOPE_CLASS && scope->parent->parent->kind == SCOPE_FUNCTION)) {
+        // Note that we don't actually use this information at the moment (for CPython compat only)
+        if ((SCOPE_FUNCTION <= scope->parent->kind && scope->parent->kind <= SCOPE_SET_COMP) || (scope->parent->kind == SCOPE_CLASS && scope->parent->parent->kind == SCOPE_FUNCTION)) {
             scope->flags |= SCOPE_FLAG_NESTED;
         }
     }
diff --git a/py/emit.h b/py/emit.h
index fc5538f..ff33a8f 100644
--- a/py/emit.h
+++ b/py/emit.h
@@ -34,7 +34,7 @@
     void (*import_from)(emit_t *emit, qstr qstr);
     void (*import_star)(emit_t *emit);
     void (*load_const_tok)(emit_t *emit, mp_token_kind_t tok);
-    void (*load_const_small_int)(emit_t *emit, int arg);
+    void (*load_const_small_int)(emit_t *emit, machine_int_t arg);
     void (*load_const_int)(emit_t *emit, qstr qstr);
     void (*load_const_dec)(emit_t *emit, qstr qstr);
     void (*load_const_id)(emit_t *emit, qstr qstr);
diff --git a/py/emitbc.c b/py/emitbc.c
index 9fa2880..6d9523f 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -108,7 +108,7 @@
 }
 
 // integers (for small ints) are stored as 24 bits, in excess
-static void emit_write_byte_code_byte_int(emit_t* emit, byte b1, int num) {
+static void emit_write_byte_code_byte_int(emit_t* emit, byte b1, machine_int_t num) {
     num += 0x800000;
     assert(0 <= num && num <= 0xffffff);
     byte* c = emit_get_cur_to_write_byte_code(emit, 4);
@@ -319,7 +319,7 @@
     }
 }
 
-static void emit_bc_load_const_small_int(emit_t *emit, int arg) {
+static void emit_bc_load_const_small_int(emit_t *emit, machine_int_t arg) {
     emit_pre(emit, 1);
     emit_write_byte_code_byte_int(emit, MP_BC_LOAD_CONST_SMALL_INT, arg);
 }
@@ -390,7 +390,7 @@
 }
 
 static void emit_bc_load_method(emit_t *emit, qstr qstr) {
-    emit_pre(emit, 0);
+    emit_pre(emit, 1);
     emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_METHOD, qstr);
 }
 
@@ -707,7 +707,7 @@
     if (have_dbl_star_arg) {
         s += 1;
     }
-    emit_pre(emit, -n_positional - 2 * n_keyword - s);
+    emit_pre(emit, -1 - n_positional - 2 * n_keyword - s);
     int op;
     if (have_star_arg) {
         if (have_dbl_star_arg) {
diff --git a/py/emitcpy.c b/py/emitcpy.c
index 71861c9..2e5c34c 100644
--- a/py/emitcpy.c
+++ b/py/emitcpy.c
@@ -149,10 +149,10 @@
     }
 }
 
-static void emit_cpy_load_const_small_int(emit_t *emit, int arg) {
+static void emit_cpy_load_const_small_int(emit_t *emit, machine_int_t arg) {
     emit_pre(emit, 1, 3);
     if (emit->pass == PASS_3) {
-        printf("LOAD_CONST %d\n", arg);
+        printf("LOAD_CONST " INT_FMT "\n", arg);
     }
 }
 
diff --git a/py/emitnative.c b/py/emitnative.c
index 258aa9f..1e5ea1f 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -599,7 +599,7 @@
     emit_post_push_imm(emit, vtype, val);
 }
 
-static void emit_native_load_const_small_int(emit_t *emit, int arg) {
+static void emit_native_load_const_small_int(emit_t *emit, machine_int_t arg) {
     emit_pre(emit);
     if (emit->do_viper_types) {
         emit_post_push_imm(emit, VTYPE_INT, arg);
diff --git a/py/parse.c b/py/parse.c
index d9969d6..d0776ce 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -172,15 +172,15 @@
     if (MP_PARSE_NODE_IS_NULL(pn)) {
         printf("NULL\n");
     } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
-        int arg = MP_PARSE_NODE_LEAF_ARG(pn);
+        machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
         switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
             case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
-            case MP_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break;
+            case MP_PARSE_NODE_SMALL_INT: printf("int(" INT_FMT ")\n", arg); break;
             case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
             case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
             case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
             case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
-            case MP_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break;
+            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
             default: assert(0);
         }
     } else {
diff --git a/py/runtime.c b/py/runtime.c
index 39f297f..3c97505 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -217,7 +217,7 @@
     //printf("byte code: %d bytes\n", len);
 
 #ifdef DEBUG_PRINT
-    DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d\n", unique_code_id, code, len, n_args);
+    DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d n_locals=%d n_stack=%d\n", unique_code_id, code, len, n_args, n_locals, n_stack);
     for (int i = 0; i < 128 && i < len; i++) {
         if (i > 0 && i % 16 == 0) {
             DEBUG_printf("\n");
diff --git a/py/showbc.c b/py/showbc.c
index f914223..d7ae17c 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -68,12 +68,10 @@
                 printf("LOAD_CONST_INT %s", qstr_str(qstr));
                 break;
 
-                /*
             case MP_BC_LOAD_CONST_DEC:
                 DECODE_QSTR;
-                PUSH(rt_load_const_dec(qstr));
+                printf("LOAD_CONST_DEC %s", qstr_str(qstr));
                 break;
-                */
 
             case MP_BC_LOAD_CONST_ID:
                 DECODE_QSTR;
@@ -351,12 +349,12 @@
 
             case MP_BC_IMPORT_NAME:
                 DECODE_QSTR;
-                printf("IMPORT NAME %s", qstr_str(qstr));
+                printf("IMPORT_NAME %s", qstr_str(qstr));
                 break;
 
             case MP_BC_IMPORT_FROM:
                 DECODE_QSTR;
-                printf("IMPORT NAME %s", qstr_str(qstr));
+                printf("IMPORT_FROM %s", qstr_str(qstr));
                 break;
 
             default:
diff --git a/py/vm.c b/py/vm.c
index 0cc2602..cb4c6a8 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -26,8 +26,6 @@
 #define SET_TOP(val) *sp = (val)
 
 mp_obj_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, uint n_state) {
-    n_state += 1; // XXX there is a bug somewhere which doesn't count enough state... (conwaylife and mandel have the bug)
-
     // allocate state for locals and stack
     mp_obj_t temp_state[10];
     mp_obj_t *state = &temp_state[0];
@@ -479,8 +477,7 @@
 
                     case MP_BC_MAKE_CLOSURE:
                         DECODE_UINT;
-                        obj1 = POP();
-                        PUSH(rt_make_closure_from_id(unum, obj1));
+                        SET_TOP(rt_make_closure_from_id(unum, TOP()));
                         break;
 
                     case MP_BC_CALL_FUNCTION: