py: Properly implement deletion of locals and derefs, and detect errors.

Needed to reinstate 2 delete opcodes, to specifically check that a local
is not deleted twice.
diff --git a/py/bc0.h b/py/bc0.h
index 83993f8..e6a0e21 100644
--- a/py/bc0.h
+++ b/py/bc0.h
@@ -1,10 +1,6 @@
 // Micro Python byte-codes.
 // The comment at the end of the line (if it exists) tells the arguments to the byte-code.
 
-// TODO Add MP_BC_LOAD_FAST_CHECKED and MP_BC_STORE_FAST_CHECKED for acting on those
-// locals which have del called on them anywhere in the function.
-// UnboundLocalError: local variable '%s' referenced before assignment
-
 #define MP_BC_LOAD_CONST_FALSE   (0x10)
 #define MP_BC_LOAD_CONST_NONE    (0x11)
 #define MP_BC_LOAD_CONST_TRUE    (0x12)
@@ -21,12 +17,13 @@
 #define MP_BC_LOAD_FAST_1        (0x21)
 #define MP_BC_LOAD_FAST_2        (0x22)
 #define MP_BC_LOAD_FAST_N        (0x23) // uint
-#define MP_BC_LOAD_DEREF         (0x24) // uint
-#define MP_BC_LOAD_NAME          (0x25) // qstr
-#define MP_BC_LOAD_GLOBAL        (0x26) // qstr
-#define MP_BC_LOAD_ATTR          (0x27) // qstr
-#define MP_BC_LOAD_METHOD        (0x28) // qstr
-#define MP_BC_LOAD_BUILD_CLASS   (0x29)
+#define MP_BC_LOAD_FAST_CHECKED  (0x24) // uint
+#define MP_BC_LOAD_DEREF         (0x25) // uint
+#define MP_BC_LOAD_NAME          (0x26) // qstr
+#define MP_BC_LOAD_GLOBAL        (0x27) // qstr
+#define MP_BC_LOAD_ATTR          (0x28) // qstr
+#define MP_BC_LOAD_METHOD        (0x29) // qstr
+#define MP_BC_LOAD_BUILD_CLASS   (0x2a)
 
 #define MP_BC_STORE_FAST_0       (0x30)
 #define MP_BC_STORE_FAST_1       (0x31)
@@ -38,8 +35,10 @@
 #define MP_BC_STORE_ATTR         (0x37) // qstr
 #define MP_BC_STORE_SUBSCR       (0x38)
 
-#define MP_BC_DELETE_NAME        (0x39) // qstr
-#define MP_BC_DELETE_GLOBAL      (0x3a) // qstr
+#define MP_BC_DELETE_FAST        (0x39) // uint
+#define MP_BC_DELETE_DEREF       (0x3a) // uint
+#define MP_BC_DELETE_NAME        (0x3b) // qstr
+#define MP_BC_DELETE_GLOBAL      (0x3c) // qstr
 
 #define MP_BC_DUP_TOP            (0x40)
 #define MP_BC_DUP_TOP_TWO        (0x41)
diff --git a/py/compile.c b/py/compile.c
index 49dddb0..3ea6dd3 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -797,7 +797,7 @@
                         EMIT_ARG(load_closure, id->qstr, id->local_num);
 #else
                         // in Micro Python we load closures using LOAD_FAST
-                        EMIT_ARG(load_fast, id->qstr, id->local_num);
+                        EMIT_ARG(load_fast, id->qstr, id->flags, id->local_num);
 #endif
                         nfree += 1;
                     }
@@ -2208,8 +2208,8 @@
         // get first argument to function
         bool found = false;
         for (int i = 0; i < comp->scope_cur->id_info_len; i++) {
-            if (comp->scope_cur->id_info[i].flags && ID_FLAG_IS_PARAM) {
-                EMIT_ARG(load_fast, MP_QSTR_, comp->scope_cur->id_info[i].local_num);
+            if (comp->scope_cur->id_info[i].flags & ID_FLAG_IS_PARAM) {
+                EMIT_ARG(load_fast, MP_QSTR_, comp->scope_cur->id_info[i].flags, comp->scope_cur->id_info[i].local_num);
                 found = true;
                 break;
             }
@@ -2990,7 +2990,7 @@
 #if MICROPY_EMIT_CPYTHON
             EMIT_ARG(load_closure, MP_QSTR___class__, 0); // XXX check this is the correct local num
 #else
-            EMIT_ARG(load_fast, MP_QSTR___class__, id->local_num);
+            EMIT_ARG(load_fast, MP_QSTR___class__, id->flags, id->local_num);
 #endif
         }
         EMIT(return_value);
@@ -3154,7 +3154,7 @@
         if (num_free > 0) {
             for (int i = 0; i < scope->id_info_len; i++) {
                 id_info_t *id = &scope->id_info[i];
-                if (id->kind != ID_INFO_KIND_FREE || (id->flags && ID_FLAG_IS_PARAM)) {
+                if (id->kind != ID_INFO_KIND_FREE || (id->flags & ID_FLAG_IS_PARAM)) {
                     id->local_num += num_free;
                 }
             }
diff --git a/py/emit.h b/py/emit.h
index 47ac045..5ce11af 100644
--- a/py/emit.h
+++ b/py/emit.h
@@ -43,9 +43,9 @@
     void (*load_const_id)(emit_t *emit, qstr qstr);
     void (*load_const_str)(emit_t *emit, qstr qstr, bool bytes);
     void (*load_const_verbatim_str)(emit_t *emit, const char *str); // only needed for emitcpy
-    void (*load_fast)(emit_t *emit, qstr qstr, int local_num);
+    void (*load_fast)(emit_t *emit, qstr qstr, uint id_flags, int local_num);
     void (*load_deref)(emit_t *emit, qstr qstr, int local_num);
-    void (*load_closure)(emit_t *emit, qstr qstr, int local_num);
+    void (*load_closure)(emit_t *emit, qstr qstr, int local_num); // only needed for emitcpy
     void (*load_name)(emit_t *emit, qstr qstr);
     void (*load_global)(emit_t *emit, qstr qstr);
     void (*load_attr)(emit_t *emit, qstr qstr);
diff --git a/py/emitbc.c b/py/emitbc.c
index f16ffce..21d1a61 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -408,14 +408,20 @@
     emit_write_byte_code_byte(emit, MP_BC_LOAD_NULL);
 };
 
-STATIC void emit_bc_load_fast(emit_t *emit, qstr qstr, int local_num) {
+STATIC void emit_bc_load_fast(emit_t *emit, qstr qstr, uint id_flags, int local_num) {
     assert(local_num >= 0);
     emit_bc_pre(emit, 1);
-    switch (local_num) {
-        case 0: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_0); break;
-        case 1: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_1); break;
-        case 2: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_2); break;
-        default: emit_write_byte_code_byte_uint(emit, MP_BC_LOAD_FAST_N, local_num); break;
+    if (id_flags & ID_FLAG_IS_DELETED) {
+        // This local may be deleted, so need to do a checked load.
+        emit_write_byte_code_byte_uint(emit, MP_BC_LOAD_FAST_CHECKED, local_num);
+    } else {
+        // This local is never deleted, so can do a fast, uncheched load.
+        switch (local_num) {
+            case 0: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_0); break;
+            case 1: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_1); break;
+            case 2: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_2); break;
+            default: emit_write_byte_code_byte_uint(emit, MP_BC_LOAD_FAST_N, local_num); break;
+        }
     }
 }
 
@@ -491,13 +497,11 @@
 }
 
 STATIC void emit_bc_delete_fast(emit_t *emit, qstr qstr, int local_num) {
-    emit_bc_load_null(emit);
-    emit_bc_store_fast(emit, qstr, local_num);
+    emit_write_byte_code_byte_uint(emit, MP_BC_DELETE_FAST, local_num);
 }
 
 STATIC void emit_bc_delete_deref(emit_t *emit, qstr qstr, int local_num) {
-    emit_bc_load_null(emit);
-    emit_bc_store_deref(emit, qstr, local_num);
+    emit_write_byte_code_byte_uint(emit, MP_BC_DELETE_DEREF, local_num);
 }
 
 STATIC void emit_bc_delete_name(emit_t *emit, qstr qstr) {
diff --git a/py/emitcommon.c b/py/emitcommon.c
index f749466..30f5ace 100644
--- a/py/emitcommon.c
+++ b/py/emitcommon.c
@@ -25,7 +25,7 @@
     } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
         EMIT(load_global, qstr);
     } else if (id->kind == ID_INFO_KIND_LOCAL) {
-        EMIT(load_fast, qstr, id->local_num);
+        EMIT(load_fast, qstr, id->flags, id->local_num);
     } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) {
         EMIT(load_deref, qstr, id->local_num);
     } else {
diff --git a/py/emitnative.c b/py/emitnative.c
index 600960f..e96b122 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -704,7 +704,7 @@
     assert(0);
 }
 
-STATIC void emit_native_load_fast(emit_t *emit, qstr qstr, int local_num) {
+STATIC void emit_native_load_fast(emit_t *emit, qstr qstr, uint id_flags, int local_num) {
     vtype_kind_t vtype = emit->local_vtype[local_num];
     if (vtype == VTYPE_UNBOUND) {
         printf("ViperTypeError: local %s used before type known\n", qstr_str(qstr));
diff --git a/py/emitpass1.c b/py/emitpass1.c
index d2f7aaa..6088489 100644
--- a/py/emitpass1.c
+++ b/py/emitpass1.c
@@ -91,7 +91,8 @@
 }
 
 STATIC void emit_pass1_delete_id(emit_t *emit, qstr qstr) {
-    get_id_for_modification(emit->scope, qstr);
+    id_info_t *id = get_id_for_modification(emit->scope, qstr);
+    id->flags |= ID_FLAG_IS_DELETED;
 }
 
 const emit_method_table_t emit_pass1_method_table = {
diff --git a/py/scope.h b/py/scope.h
index 68f55e0..07b41fe 100644
--- a/py/scope.h
+++ b/py/scope.h
@@ -8,6 +8,7 @@
 
 enum {
     ID_FLAG_IS_PARAM = 0x01,
+    ID_FLAG_IS_DELETED = 0x02,
 };
 
 typedef struct _id_info_t {
diff --git a/py/showbc.c b/py/showbc.c
index c566204..c1e420f 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -123,6 +123,11 @@
                 printf("LOAD_FAST_N " UINT_FMT, unum);
                 break;
 
+            case MP_BC_LOAD_FAST_CHECKED:
+                DECODE_UINT;
+                printf("LOAD_FAST_CHECKED " UINT_FMT, unum);
+                break;
+
             case MP_BC_LOAD_DEREF:
                 DECODE_UINT;
                 printf("LOAD_DEREF " UINT_FMT, unum);
@@ -193,6 +198,16 @@
                 printf("STORE_SUBSCR");
                 break;
 
+            case MP_BC_DELETE_FAST:
+                DECODE_UINT;
+                printf("DELETE_FAST " UINT_FMT, unum);
+                break;
+
+            case MP_BC_DELETE_DEREF:
+                DECODE_UINT;
+                printf("DELETE_DEREF " UINT_FMT, unum);
+                break;
+
             case MP_BC_DELETE_NAME:
                 DECODE_QSTR;
                 printf("DELETE_NAME %s", qstr_str(qstr));
diff --git a/py/vm.c b/py/vm.c
index 1701044..f8b60ac 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -241,9 +241,23 @@
                         PUSH(fastn[-unum]);
                         break;
 
+                    case MP_BC_LOAD_FAST_CHECKED:
+                        DECODE_UINT;
+                        obj1 = fastn[-unum];
+                        if (obj1 == MP_OBJ_NULL) {
+                            local_name_error:
+                            nlr_raise(mp_obj_new_exception_msg(&mp_type_NameError, "local variable referenced before assignment"));
+                        }
+                        PUSH(obj1);
+                        break;
+
                     case MP_BC_LOAD_DEREF:
                         DECODE_UINT;
-                        PUSH(mp_obj_cell_get(fastn[-unum]));
+                        obj1 = mp_obj_cell_get(fastn[-unum]);
+                        if (obj1 == MP_OBJ_NULL) {
+                            goto local_name_error;
+                        }
+                        PUSH(obj1);
                         break;
 
                     case MP_BC_LOAD_NAME:
@@ -314,6 +328,22 @@
                         sp -= 3;
                         break;
 
+                    case MP_BC_DELETE_FAST:
+                        DECODE_UINT;
+                        if (fastn[-unum] == MP_OBJ_NULL) {
+                            goto local_name_error;
+                        }
+                        fastn[-unum] = MP_OBJ_NULL;
+                        break;
+
+                    case MP_BC_DELETE_DEREF:
+                        DECODE_UINT;
+                        if (mp_obj_cell_get(fastn[-unum]) == MP_OBJ_NULL) {
+                            goto local_name_error;
+                        }
+                        mp_obj_cell_set(fastn[-unum], MP_OBJ_NULL);
+                        break;
+
                     case MP_BC_DELETE_NAME:
                         DECODE_QSTR;
                         mp_delete_name(qst);